{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"8SVM-下 - 案例部分源码-未处理地点.ipynb","version":"0.3.2","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"metadata":{"id":"PeCgeTCDRGv_","colab_type":"text"},"cell_type":"markdown","source":["# 导库导数据，探索特征"]},{"metadata":{"id":"HqheAv7UJGtA","colab_type":"code","colab":{}},"cell_type":"code","source":["import pandas as pd\n","import numpy as np\n","from sklearn.model_selection import train_test_split"],"execution_count":0,"outputs":[]},{"metadata":{"id":"dn5uDt8UJGtL","colab_type":"code","colab":{}},"cell_type":"code","source":["weather = pd.read_csv(r\"weatherAUS5000.csv\",index_col=0)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"HPCOiTLJJGtR","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":338},"outputId":"fb3b82fa-e6f8-4dde-9c23-4a4c91f07d92","executionInfo":{"status":"ok","timestamp":1546761651648,"user_tz":-480,"elapsed":714,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["weather.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Date</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainTomorrow</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>2015-03-24</td>\n","      <td>Adelaide</td>\n","      <td>12.3</td>\n","      <td>19.3</td>\n","      <td>0.0</td>\n","      <td>5.0</td>\n","      <td>NaN</td>\n","      <td>S</td>\n","      <td>39.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>59.0</td>\n","      <td>47.0</td>\n","      <td>1022.2</td>\n","      <td>1021.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>15.1</td>\n","      <td>17.7</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2011-07-12</td>\n","      <td>Adelaide</td>\n","      <td>7.9</td>\n","      <td>11.4</td>\n","      <td>0.0</td>\n","      <td>1.0</td>\n","      <td>0.5</td>\n","      <td>N</td>\n","      <td>20.0</td>\n","      <td>NNE</td>\n","      <td>...</td>\n","      <td>7.0</td>\n","      <td>70.0</td>\n","      <td>59.0</td>\n","      <td>1028.7</td>\n","      <td>1025.7</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>8.4</td>\n","      <td>11.3</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2010-02-08</td>\n","      <td>Adelaide</td>\n","      <td>24.0</td>\n","      <td>38.1</td>\n","      <td>0.0</td>\n","      <td>23.4</td>\n","      <td>13.0</td>\n","      <td>SE</td>\n","      <td>39.0</td>\n","      <td>NNE</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>36.0</td>\n","      <td>24.0</td>\n","      <td>1018.0</td>\n","      <td>1016.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>32.4</td>\n","      <td>37.4</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>2016-09-19</td>\n","      <td>Adelaide</td>\n","      <td>6.7</td>\n","      <td>16.4</td>\n","      <td>0.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>N</td>\n","      <td>31.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>65.0</td>\n","      <td>40.0</td>\n","      <td>1014.4</td>\n","      <td>1010.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>11.2</td>\n","      <td>15.9</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>2014-03-05</td>\n","      <td>Adelaide</td>\n","      <td>16.7</td>\n","      <td>24.8</td>\n","      <td>0.0</td>\n","      <td>6.6</td>\n","      <td>11.7</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>24.0</td>\n","      <td>61.0</td>\n","      <td>48.0</td>\n","      <td>1019.3</td>\n","      <td>1018.9</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>20.8</td>\n","      <td>23.7</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["         Date  Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0  2015-03-24  Adelaide     12.3     19.3       0.0          5.0       NaN   \n","1  2011-07-12  Adelaide      7.9     11.4       0.0          1.0       0.5   \n","2  2010-02-08  Adelaide     24.0     38.1       0.0         23.4      13.0   \n","3  2016-09-19  Adelaide      6.7     16.4       0.4          NaN       NaN   \n","4  2014-03-05  Adelaide     16.7     24.8       0.0          6.6      11.7   \n","\n","  WindGustDir  WindGustSpeed WindDir9am      ...      WindSpeed3pm  \\\n","0           S           39.0          S      ...              19.0   \n","1           N           20.0        NNE      ...               7.0   \n","2          SE           39.0        NNE      ...              19.0   \n","3           N           31.0          N      ...              15.0   \n","4           S           37.0          S      ...              24.0   \n","\n","   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n","0         59.0         47.0       1022.2       1021.4       NaN       NaN   \n","1         70.0         59.0       1028.7       1025.7       NaN       NaN   \n","2         36.0         24.0       1018.0       1016.0       NaN       NaN   \n","3         65.0         40.0       1014.4       1010.0       NaN       NaN   \n","4         61.0         48.0       1019.3       1018.9       NaN       NaN   \n","\n","   Temp9am  Temp3pm  RainTomorrow  \n","0     15.1     17.7            No  \n","1      8.4     11.3            No  \n","2     32.4     37.4            No  \n","3     11.2     15.9            No  \n","4     20.8     23.7            No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"metadata":{"id":"jOjE-29tJGta","colab_type":"code","colab":{}},"cell_type":"code","source":["#将特征矩阵和标签Y分开\n","X = weather.iloc[:,:-1]\n","Y = weather.iloc[:,-1]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"itMoBsYPJGtk","colab_type":"code","colab":{}},"cell_type":"code","source":["#分裂的快捷键：ctrl shift -\n","\n","#合并的快捷键：shift M"],"execution_count":0,"outputs":[]},{"metadata":{"id":"8-xZ99MXJGtw","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"1bf27b94-244f-4d17-fa09-bee1a797250b","executionInfo":{"status":"ok","timestamp":1546761659991,"user_tz":-480,"elapsed":752,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["X.shape #5000行是我随机选的"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(5000, 21)"]},"metadata":{"tags":[]},"execution_count":5}]},{"metadata":{"id":"Lpx25hbrJGt5","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":459},"outputId":"fc4805e1-8707-461e-e8e5-7ba7be5854e1","executionInfo":{"status":"ok","timestamp":1546761738274,"user_tz":-480,"elapsed":716,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["#探索数据类型\n","X.info()"],"execution_count":6,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 5000 entries, 0 to 4999\n","Data columns (total 21 columns):\n","Date             5000 non-null object\n","Location         5000 non-null object\n","MinTemp          4979 non-null float64\n","MaxTemp          4987 non-null float64\n","Rainfall         4950 non-null float64\n","Evaporation      2841 non-null float64\n","Sunshine         2571 non-null float64\n","WindGustDir      4669 non-null object\n","WindGustSpeed    4669 non-null float64\n","WindDir9am       4651 non-null object\n","WindDir3pm       4887 non-null object\n","WindSpeed9am     4949 non-null float64\n","WindSpeed3pm     4919 non-null float64\n","Humidity9am      4936 non-null float64\n","Humidity3pm      4880 non-null float64\n","Pressure9am      4506 non-null float64\n","Pressure3pm      4504 non-null float64\n","Cloud9am         3111 non-null float64\n","Cloud3pm         3012 non-null float64\n","Temp9am          4967 non-null float64\n","Temp3pm          4912 non-null float64\n","dtypes: float64(16), object(5)\n","memory usage: 859.4+ KB\n"],"name":"stdout"}]},{"metadata":{"id":"1iHNfydVJGuC","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":391},"outputId":"ad2f6ce2-1b32-4414-bbe5-1680c748dc72","executionInfo":{"status":"ok","timestamp":1546761744693,"user_tz":-480,"elapsed":706,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["#探索缺失值\n","X.isnull().mean() #缺失值所占总值的比例 isnull().sum(全部的True)/X.shape[0]\n","#我们要有不同的缺失值填补策略"],"execution_count":7,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Date             0.0000\n","Location         0.0000\n","MinTemp          0.0042\n","MaxTemp          0.0026\n","Rainfall         0.0100\n","Evaporation      0.4318\n","Sunshine         0.4858\n","WindGustDir      0.0662\n","WindGustSpeed    0.0662\n","WindDir9am       0.0698\n","WindDir3pm       0.0226\n","WindSpeed9am     0.0102\n","WindSpeed3pm     0.0162\n","Humidity9am      0.0128\n","Humidity3pm      0.0240\n","Pressure9am      0.0988\n","Pressure3pm      0.0992\n","Cloud9am         0.3778\n","Cloud3pm         0.3976\n","Temp9am          0.0066\n","Temp3pm          0.0176\n","dtype: float64"]},"metadata":{"tags":[]},"execution_count":7}]},{"metadata":{"id":"dlnYcQHRJGuN","colab_type":"code","colab":{}},"cell_type":"code","source":["#在上方添加一个新的cell ESC a enter"],"execution_count":0,"outputs":[]},{"metadata":{"id":"hrP2PM_3JGuT","colab_type":"code","colab":{}},"cell_type":"code","source":["#在下方添加一个新的cell ESC b enter"],"execution_count":0,"outputs":[]},{"metadata":{"id":"vkoG0MvIJGu6","colab_type":"code","colab":{}},"cell_type":"code","source":["#删除一个cell ESC d d"],"execution_count":0,"outputs":[]},{"metadata":{"id":"_HjB3B6DRB9K","colab_type":"text"},"cell_type":"markdown","source":["# 分集，优先探索标签"]},{"metadata":{"id":"amA4CV0rJGu-","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"33cebd80-891b-4d56-f88b-1f4fe01637c0","executionInfo":{"status":"ok","timestamp":1546761894878,"user_tz":-480,"elapsed":728,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Y.shape"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(5000,)"]},"metadata":{"tags":[]},"execution_count":8}]},{"metadata":{"id":"E2aTsn0MJGvE","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"42a5e028-0213-436f-adcb-ee84cc900e65","executionInfo":{"status":"ok","timestamp":1546761899926,"user_tz":-480,"elapsed":1696,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Y.isnull().sum() #加和的时候，True是1，False是0"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0"]},"metadata":{"tags":[]},"execution_count":9}]},{"metadata":{"id":"LDFm0wfPP2iR","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"a69e1547-9ece-4a22-a177-4d114471d3d2","executionInfo":{"status":"ok","timestamp":1546762822104,"user_tz":-480,"elapsed":2341,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["type(Y)"],"execution_count":23,"outputs":[{"output_type":"execute_result","data":{"text/plain":["pandas.core.series.Series"]},"metadata":{"tags":[]},"execution_count":23}]},{"metadata":{"id":"APUpqTx-QePi","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"87e19b75-c2ca-4968-964a-c2f5531477b6","executionInfo":{"status":"ok","timestamp":1546762982247,"user_tz":-480,"elapsed":686,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Y.unique()"],"execution_count":24,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['No', 'Yes'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":24}]},{"metadata":{"id":"M7AzOmClJGvP","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"6df51b8c-c12e-430e-d3a0-a1c47845fba8","executionInfo":{"status":"ok","timestamp":1546762003889,"user_tz":-480,"elapsed":706,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["#探索标签的分类\n","np.unique(Y) #我们的标签是二分类"],"execution_count":10,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['No', 'Yes'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":10}]},{"metadata":{"id":"7wC3yvWtJGvX","colab_type":"code","colab":{}},"cell_type":"code","source":["#分训练集和测试集\n","Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,Y,test_size=0.3,random_state=420) #随机抽样"],"execution_count":0,"outputs":[]},{"metadata":{"id":"9VSyWCFuJGvb","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":338},"outputId":"1e0bd1d2-cd4c-4e3c-d25f-caac65fabc0e","executionInfo":{"status":"ok","timestamp":1546762158350,"user_tz":-480,"elapsed":699,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.head()"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Date</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed9am</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>1809</th>\n","      <td>2015-08-24</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","    </tr>\n","    <tr>\n","      <th>4176</th>\n","      <td>2016-12-10</td>\n","      <td>Tuggeranong</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>7.0</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","    </tr>\n","    <tr>\n","      <th>110</th>\n","      <td>2010-04-18</td>\n","      <td>Albany</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","    </tr>\n","    <tr>\n","      <th>3582</th>\n","      <td>2009-11-26</td>\n","      <td>Sale</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>11.0</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","    </tr>\n","    <tr>\n","      <th>2162</th>\n","      <td>2014-04-25</td>\n","      <td>Mildura</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 21 columns</p>\n","</div>"],"text/plain":["            Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  \\\n","1809  2015-08-24    Katherine     17.5     36.0       0.0          8.8   \n","4176  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN   \n","110   2010-04-18       Albany     13.0     22.6       0.0          3.8   \n","3582  2009-11-26         Sale     13.9     29.8       0.0          5.8   \n","2162  2014-04-25      Mildura      6.0     23.5       0.0          2.8   \n","\n","      Sunshine WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  \\\n","1809       NaN         ESE           26.0        NNW   ...            17.0   \n","4176       NaN         NNW           33.0         NE   ...             7.0   \n","110       10.4         NaN            NaN         NE   ...            17.0   \n","3582       5.1           S           37.0          N   ...            11.0   \n","2162       8.6         NNE           24.0          E   ...            15.0   \n","\n","      WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  \\\n","1809          15.0         57.0          NaN       1016.8       1012.2   \n","4176          17.0         59.0         31.0       1020.4       1017.5   \n","110           31.0         79.0         68.0       1020.3       1015.7   \n","3582          28.0         82.0         44.0       1012.5       1005.9   \n","2162          15.0         58.0         35.0       1019.8       1014.1   \n","\n","      Cloud9am  Cloud3pm  Temp9am  Temp3pm  \n","1809       0.0       NaN     27.5      NaN  \n","4176       NaN       NaN     14.6     23.6  \n","110        1.0       3.0     17.5     20.8  \n","3582       6.0       6.0     18.5     27.5  \n","2162       2.0       4.0     12.4     22.4  \n","\n","[5 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"metadata":{"id":"dtcw0Ps8JGvf","colab_type":"code","colab":{}},"cell_type":"code","source":["#恢复索引\n","for i in [Xtrain, Xtest, Ytrain, Ytest]:\n","    i.index = range(i.shape[0])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"tIp04WuLJGvm","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":338},"outputId":"039984a0-a20d-461f-be4e-554f1d4f43c0","executionInfo":{"status":"ok","timestamp":1546762205192,"user_tz":-480,"elapsed":782,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.head()"],"execution_count":14,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Date</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed9am</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>2015-08-24</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2016-12-10</td>\n","      <td>Tuggeranong</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>7.0</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2010-04-18</td>\n","      <td>Albany</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>2009-11-26</td>\n","      <td>Sale</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>11.0</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>2014-04-25</td>\n","      <td>Mildura</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 21 columns</p>\n","</div>"],"text/plain":["         Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0  2015-08-24    Katherine     17.5     36.0       0.0          8.8       NaN   \n","1  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n","2  2010-04-18       Albany     13.0     22.6       0.0          3.8      10.4   \n","3  2009-11-26         Sale     13.9     29.8       0.0          5.8       5.1   \n","4  2014-04-25      Mildura      6.0     23.5       0.0          2.8       8.6   \n","\n","  WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  WindSpeed3pm  \\\n","0         ESE           26.0        NNW   ...            17.0          15.0   \n","1         NNW           33.0         NE   ...             7.0          17.0   \n","2         NaN            NaN         NE   ...            17.0          31.0   \n","3           S           37.0          N   ...            11.0          28.0   \n","4         NNE           24.0          E   ...            15.0          15.0   \n","\n","   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n","0         57.0          NaN       1016.8       1012.2       0.0       NaN   \n","1         59.0         31.0       1020.4       1017.5       NaN       NaN   \n","2         79.0         68.0       1020.3       1015.7       1.0       3.0   \n","3         82.0         44.0       1012.5       1005.9       6.0       6.0   \n","4         58.0         35.0       1019.8       1014.1       2.0       4.0   \n","\n","   Temp9am  Temp3pm  \n","0     27.5      NaN  \n","1     14.6     23.6  \n","2     17.5     20.8  \n","3     18.5     27.5  \n","4     12.4     22.4  \n","\n","[5 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":14}]},{"metadata":{"id":"o9huG5-AJGvw","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":119},"outputId":"8b2a76ab-75a0-4897-b63f-baf6ce2a1207","executionInfo":{"status":"ok","timestamp":1546762208530,"user_tz":-480,"elapsed":716,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Ytrain.head()"],"execution_count":15,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0     No\n","1     No\n","2     No\n","3    Yes\n","4     No\n","Name: RainTomorrow, dtype: object"]},"metadata":{"tags":[]},"execution_count":15}]},{"metadata":{"id":"_GAQUGPZJGvz","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":68},"outputId":"b0131d36-b212-4cc0-eaa5-afff8fa53f28","executionInfo":{"status":"ok","timestamp":1546762212892,"user_tz":-480,"elapsed":692,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["#是否有样本不平衡问题？\n","Ytrain.value_counts()"],"execution_count":16,"outputs":[{"output_type":"execute_result","data":{"text/plain":["No     2704\n","Yes     796\n","Name: RainTomorrow, dtype: int64"]},"metadata":{"tags":[]},"execution_count":16}]},{"metadata":{"id":"z4EkFK6jJGv4","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":68},"outputId":"d781f993-40ee-4b15-ccb3-5881ad522c98","executionInfo":{"status":"ok","timestamp":1546762216087,"user_tz":-480,"elapsed":728,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Ytest.value_counts()"],"execution_count":17,"outputs":[{"output_type":"execute_result","data":{"text/plain":["No     1157\n","Yes     343\n","Name: RainTomorrow, dtype: int64"]},"metadata":{"tags":[]},"execution_count":17}]},{"metadata":{"id":"k_nd-232JGv7","colab_type":"code","colab":{}},"cell_type":"code","source":["#有轻微的样本不均衡问题"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Mkt1ckm6JGv9","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"39c8984a-8763-4379-c55b-7fb92d6e8b07","executionInfo":{"status":"ok","timestamp":1546762543274,"user_tz":-480,"elapsed":644,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Ytrain.value_counts()[0]/Ytrain.value_counts()[1]"],"execution_count":18,"outputs":[{"output_type":"execute_result","data":{"text/plain":["3.3969849246231156"]},"metadata":{"tags":[]},"execution_count":18}]},{"metadata":{"id":"gWzzfvCoJGwA","colab_type":"code","colab":{}},"cell_type":"code","source":["#将标签编码\n","from sklearn.preprocessing import LabelEncoder #标签专用，第三章讲过\n","encorder = LabelEncoder().fit(Ytrain) #允许一维数据的输入的\n","#认得了：有两类，YES和NO，YES是1，NO是0"],"execution_count":0,"outputs":[]},{"metadata":{"id":"p4TOwn2WJGwD","colab_type":"code","colab":{}},"cell_type":"code","source":["#使用训练集进行训练，然后在训练集和测试集上分别进行transform\n","Ytrain = pd.DataFrame(encorder.transform(Ytrain))\n","Ytest = pd.DataFrame(encorder.transform(Ytest))\n","\n","#如果我们的测试集中，出现了训练集中没有出现过的标签类别\n","#比如说，测试集中有YES, NO, UNKNOWN\n","#而我们的训练集中只有YES和NO"],"execution_count":0,"outputs":[]},{"metadata":{"id":"nG--XI7sJGwE","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":204},"outputId":"478fa980-366b-48bb-f5a5-99524edfdef3","executionInfo":{"status":"ok","timestamp":1546762622742,"user_tz":-480,"elapsed":1409,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Ytrain.head()"],"execution_count":21,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>0</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   0\n","0  0\n","1  0\n","2  0\n","3  1\n","4  0"]},"metadata":{"tags":[]},"execution_count":21}]},{"metadata":{"id":"SODKCU3YJGwJ","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":204},"outputId":"e24c5e30-c9c7-4878-9839-a927e21c7429","executionInfo":{"status":"ok","timestamp":1546762640827,"user_tz":-480,"elapsed":731,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Ytest.head()"],"execution_count":22,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>0</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   0\n","0  0\n","1  0\n","2  1\n","3  0\n","4  0"]},"metadata":{"tags":[]},"execution_count":22}]},{"metadata":{"id":"aUrKC6cKJGwP","colab_type":"code","colab":{}},"cell_type":"code","source":["Ytrain.to_csv(\"你想要保存这个文件的地址.文件名.csv\")"],"execution_count":0,"outputs":[]},{"metadata":{"id":"wzR8yyRwQDtn","colab_type":"text"},"cell_type":"markdown","source":["# 处理特征"]},{"metadata":{"id":"Zn7jIU5zRc-o","colab_type":"text"},"cell_type":"markdown","source":["## 1 描述性统计与异常值"]},{"metadata":{"id":"NDGtymU7JGwQ","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":565},"outputId":"e57b0580-0168-407f-dc0e-37af63b556a5","executionInfo":{"status":"ok","timestamp":1546763311614,"user_tz":-480,"elapsed":1496,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["#描述性统计\n","Xtrain.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T"],"execution_count":25,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>count</th>\n","      <th>mean</th>\n","      <th>std</th>\n","      <th>min</th>\n","      <th>1%</th>\n","      <th>5%</th>\n","      <th>10%</th>\n","      <th>25%</th>\n","      <th>50%</th>\n","      <th>75%</th>\n","      <th>90%</th>\n","      <th>99%</th>\n","      <th>max</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>MinTemp</th>\n","      <td>3486.0</td>\n","      <td>12.225645</td>\n","      <td>6.396243</td>\n","      <td>-6.5</td>\n","      <td>-1.715</td>\n","      <td>1.800</td>\n","      <td>4.1</td>\n","      <td>7.7</td>\n","      <td>12.0</td>\n","      <td>16.7</td>\n","      <td>20.9</td>\n","      <td>25.900</td>\n","      <td>29.0</td>\n","    </tr>\n","    <tr>\n","      <th>MaxTemp</th>\n","      <td>3489.0</td>\n","      <td>23.245543</td>\n","      <td>7.201839</td>\n","      <td>-3.7</td>\n","      <td>8.888</td>\n","      <td>12.840</td>\n","      <td>14.5</td>\n","      <td>18.0</td>\n","      <td>22.5</td>\n","      <td>28.4</td>\n","      <td>33.0</td>\n","      <td>40.400</td>\n","      <td>46.4</td>\n","    </tr>\n","    <tr>\n","      <th>Rainfall</th>\n","      <td>3467.0</td>\n","      <td>2.487049</td>\n","      <td>7.949686</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.000</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>0.8</td>\n","      <td>6.6</td>\n","      <td>41.272</td>\n","      <td>115.8</td>\n","    </tr>\n","    <tr>\n","      <th>Evaporation</th>\n","      <td>1983.0</td>\n","      <td>5.619163</td>\n","      <td>4.383098</td>\n","      <td>0.0</td>\n","      <td>0.400</td>\n","      <td>0.800</td>\n","      <td>1.4</td>\n","      <td>2.6</td>\n","      <td>4.8</td>\n","      <td>7.4</td>\n","      <td>10.2</td>\n","      <td>20.600</td>\n","      <td>56.0</td>\n","    </tr>\n","    <tr>\n","      <th>Sunshine</th>\n","      <td>1790.0</td>\n","      <td>7.508659</td>\n","      <td>3.805841</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.345</td>\n","      <td>1.4</td>\n","      <td>4.6</td>\n","      <td>8.3</td>\n","      <td>10.6</td>\n","      <td>12.0</td>\n","      <td>13.300</td>\n","      <td>13.9</td>\n","    </tr>\n","    <tr>\n","      <th>WindGustSpeed</th>\n","      <td>3263.0</td>\n","      <td>39.858413</td>\n","      <td>13.219607</td>\n","      <td>9.0</td>\n","      <td>15.000</td>\n","      <td>20.000</td>\n","      <td>24.0</td>\n","      <td>31.0</td>\n","      <td>39.0</td>\n","      <td>48.0</td>\n","      <td>57.0</td>\n","      <td>76.000</td>\n","      <td>117.0</td>\n","    </tr>\n","    <tr>\n","      <th>WindSpeed9am</th>\n","      <td>3466.0</td>\n","      <td>14.046163</td>\n","      <td>8.670472</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.000</td>\n","      <td>4.0</td>\n","      <td>7.0</td>\n","      <td>13.0</td>\n","      <td>19.0</td>\n","      <td>26.0</td>\n","      <td>37.000</td>\n","      <td>65.0</td>\n","    </tr>\n","    <tr>\n","      <th>WindSpeed3pm</th>\n","      <td>3437.0</td>\n","      <td>18.553390</td>\n","      <td>8.611818</td>\n","      <td>0.0</td>\n","      <td>2.000</td>\n","      <td>6.000</td>\n","      <td>7.0</td>\n","      <td>13.0</td>\n","      <td>19.0</td>\n","      <td>24.0</td>\n","      <td>30.0</td>\n","      <td>43.000</td>\n","      <td>65.0</td>\n","    </tr>\n","    <tr>\n","      <th>Humidity9am</th>\n","      <td>3459.0</td>\n","      <td>69.069095</td>\n","      <td>18.787698</td>\n","      <td>2.0</td>\n","      <td>18.000</td>\n","      <td>35.000</td>\n","      <td>45.0</td>\n","      <td>57.0</td>\n","      <td>70.0</td>\n","      <td>83.0</td>\n","      <td>94.0</td>\n","      <td>100.000</td>\n","      <td>100.0</td>\n","    </tr>\n","    <tr>\n","      <th>Humidity3pm</th>\n","      <td>3408.0</td>\n","      <td>51.651995</td>\n","      <td>20.697872</td>\n","      <td>2.0</td>\n","      <td>9.000</td>\n","      <td>17.000</td>\n","      <td>23.0</td>\n","      <td>37.0</td>\n","      <td>52.0</td>\n","      <td>66.0</td>\n","      <td>79.0</td>\n","      <td>98.000</td>\n","      <td>100.0</td>\n","    </tr>\n","    <tr>\n","      <th>Pressure9am</th>\n","      <td>3154.0</td>\n","      <td>1017.622067</td>\n","      <td>7.065236</td>\n","      <td>985.1</td>\n","      <td>1000.506</td>\n","      <td>1006.100</td>\n","      <td>1008.9</td>\n","      <td>1012.8</td>\n","      <td>1017.6</td>\n","      <td>1022.3</td>\n","      <td>1027.0</td>\n","      <td>1033.247</td>\n","      <td>1038.1</td>\n","    </tr>\n","    <tr>\n","      <th>Pressure3pm</th>\n","      <td>3154.0</td>\n","      <td>1015.227077</td>\n","      <td>7.032531</td>\n","      <td>980.2</td>\n","      <td>998.000</td>\n","      <td>1004.000</td>\n","      <td>1006.5</td>\n","      <td>1010.3</td>\n","      <td>1015.2</td>\n","      <td>1020.0</td>\n","      <td>1024.4</td>\n","      <td>1030.800</td>\n","      <td>1036.0</td>\n","    </tr>\n","    <tr>\n","      <th>Cloud9am</th>\n","      <td>2171.0</td>\n","      <td>4.491939</td>\n","      <td>2.858781</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.000</td>\n","      <td>1.0</td>\n","      <td>1.0</td>\n","      <td>5.0</td>\n","      <td>7.0</td>\n","      <td>8.0</td>\n","      <td>8.000</td>\n","      <td>8.0</td>\n","    </tr>\n","    <tr>\n","      <th>Cloud3pm</th>\n","      <td>2095.0</td>\n","      <td>4.603819</td>\n","      <td>2.655765</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.000</td>\n","      <td>1.0</td>\n","      <td>2.0</td>\n","      <td>5.0</td>\n","      <td>7.0</td>\n","      <td>8.0</td>\n","      <td>8.000</td>\n","      <td>8.0</td>\n","    </tr>\n","    <tr>\n","      <th>Temp9am</th>\n","      <td>3481.0</td>\n","      <td>16.989859</td>\n","      <td>6.537552</td>\n","      <td>-5.2</td>\n","      <td>2.400</td>\n","      <td>7.000</td>\n","      <td>9.0</td>\n","      <td>12.2</td>\n","      <td>16.6</td>\n","      <td>21.6</td>\n","      <td>26.0</td>\n","      <td>31.000</td>\n","      <td>38.0</td>\n","    </tr>\n","    <tr>\n","      <th>Temp3pm</th>\n","      <td>3431.0</td>\n","      <td>21.719003</td>\n","      <td>7.031199</td>\n","      <td>-4.1</td>\n","      <td>7.460</td>\n","      <td>11.500</td>\n","      <td>13.3</td>\n","      <td>16.6</td>\n","      <td>21.0</td>\n","      <td>26.6</td>\n","      <td>31.4</td>\n","      <td>38.600</td>\n","      <td>45.9</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                count         mean        std    min        1%        5%  \\\n","MinTemp        3486.0    12.225645   6.396243   -6.5    -1.715     1.800   \n","MaxTemp        3489.0    23.245543   7.201839   -3.7     8.888    12.840   \n","Rainfall       3467.0     2.487049   7.949686    0.0     0.000     0.000   \n","Evaporation    1983.0     5.619163   4.383098    0.0     0.400     0.800   \n","Sunshine       1790.0     7.508659   3.805841    0.0     0.000     0.345   \n","WindGustSpeed  3263.0    39.858413  13.219607    9.0    15.000    20.000   \n","WindSpeed9am   3466.0    14.046163   8.670472    0.0     0.000     0.000   \n","WindSpeed3pm   3437.0    18.553390   8.611818    0.0     2.000     6.000   \n","Humidity9am    3459.0    69.069095  18.787698    2.0    18.000    35.000   \n","Humidity3pm    3408.0    51.651995  20.697872    2.0     9.000    17.000   \n","Pressure9am    3154.0  1017.622067   7.065236  985.1  1000.506  1006.100   \n","Pressure3pm    3154.0  1015.227077   7.032531  980.2   998.000  1004.000   \n","Cloud9am       2171.0     4.491939   2.858781    0.0     0.000     0.000   \n","Cloud3pm       2095.0     4.603819   2.655765    0.0     0.000     0.000   \n","Temp9am        3481.0    16.989859   6.537552   -5.2     2.400     7.000   \n","Temp3pm        3431.0    21.719003   7.031199   -4.1     7.460    11.500   \n","\n","                  10%     25%     50%     75%     90%       99%     max  \n","MinTemp           4.1     7.7    12.0    16.7    20.9    25.900    29.0  \n","MaxTemp          14.5    18.0    22.5    28.4    33.0    40.400    46.4  \n","Rainfall          0.0     0.0     0.0     0.8     6.6    41.272   115.8  \n","Evaporation       1.4     2.6     4.8     7.4    10.2    20.600    56.0  \n","Sunshine          1.4     4.6     8.3    10.6    12.0    13.300    13.9  \n","WindGustSpeed    24.0    31.0    39.0    48.0    57.0    76.000   117.0  \n","WindSpeed9am      4.0     7.0    13.0    19.0    26.0    37.000    65.0  \n","WindSpeed3pm      7.0    13.0    19.0    24.0    30.0    43.000    65.0  \n","Humidity9am      45.0    57.0    70.0    83.0    94.0   100.000   100.0  \n","Humidity3pm      23.0    37.0    52.0    66.0    79.0    98.000   100.0  \n","Pressure9am    1008.9  1012.8  1017.6  1022.3  1027.0  1033.247  1038.1  \n","Pressure3pm    1006.5  1010.3  1015.2  1020.0  1024.4  1030.800  1036.0  \n","Cloud9am          1.0     1.0     5.0     7.0     8.0     8.000     8.0  \n","Cloud3pm          1.0     2.0     5.0     7.0     8.0     8.000     8.0  \n","Temp9am           9.0    12.2    16.6    21.6    26.0    31.000    38.0  \n","Temp3pm          13.3    16.6    21.0    26.6    31.4    38.600    45.9  "]},"metadata":{"tags":[]},"execution_count":25}]},{"metadata":{"id":"T8q0RKluJGwT","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":565},"outputId":"caca1628-45a8-4c00-cff8-f93fbf6d97b5","executionInfo":{"status":"ok","timestamp":1546763318039,"user_tz":-480,"elapsed":1549,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtest.describe([0.01,0.05,0.1,0.25,0.5,0.75,0.9,0.99]).T"],"execution_count":26,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>count</th>\n","      <th>mean</th>\n","      <th>std</th>\n","      <th>min</th>\n","      <th>1%</th>\n","      <th>5%</th>\n","      <th>10%</th>\n","      <th>25%</th>\n","      <th>50%</th>\n","      <th>75%</th>\n","      <th>90%</th>\n","      <th>99%</th>\n","      <th>max</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>MinTemp</th>\n","      <td>1493.0</td>\n","      <td>11.916812</td>\n","      <td>6.375377</td>\n","      <td>-8.5</td>\n","      <td>-2.024</td>\n","      <td>1.600</td>\n","      <td>3.70</td>\n","      <td>7.3</td>\n","      <td>11.8</td>\n","      <td>16.5</td>\n","      <td>20.48</td>\n","      <td>25.316</td>\n","      <td>28.3</td>\n","    </tr>\n","    <tr>\n","      <th>MaxTemp</th>\n","      <td>1498.0</td>\n","      <td>22.906809</td>\n","      <td>6.986043</td>\n","      <td>-0.8</td>\n","      <td>9.134</td>\n","      <td>13.000</td>\n","      <td>14.50</td>\n","      <td>17.8</td>\n","      <td>22.4</td>\n","      <td>27.8</td>\n","      <td>32.60</td>\n","      <td>38.303</td>\n","      <td>45.1</td>\n","    </tr>\n","    <tr>\n","      <th>Rainfall</th>\n","      <td>1483.0</td>\n","      <td>2.241807</td>\n","      <td>7.988822</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.000</td>\n","      <td>0.00</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>0.8</td>\n","      <td>5.20</td>\n","      <td>35.372</td>\n","      <td>108.2</td>\n","    </tr>\n","    <tr>\n","      <th>Evaporation</th>\n","      <td>858.0</td>\n","      <td>5.657809</td>\n","      <td>4.105762</td>\n","      <td>0.0</td>\n","      <td>0.400</td>\n","      <td>1.000</td>\n","      <td>1.60</td>\n","      <td>2.8</td>\n","      <td>4.8</td>\n","      <td>7.6</td>\n","      <td>10.40</td>\n","      <td>19.458</td>\n","      <td>38.8</td>\n","    </tr>\n","    <tr>\n","      <th>Sunshine</th>\n","      <td>781.0</td>\n","      <td>7.677465</td>\n","      <td>3.862294</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.300</td>\n","      <td>1.50</td>\n","      <td>4.7</td>\n","      <td>8.6</td>\n","      <td>10.7</td>\n","      <td>12.20</td>\n","      <td>13.400</td>\n","      <td>13.9</td>\n","    </tr>\n","    <tr>\n","      <th>WindGustSpeed</th>\n","      <td>1406.0</td>\n","      <td>40.044097</td>\n","      <td>14.027052</td>\n","      <td>9.0</td>\n","      <td>15.000</td>\n","      <td>20.000</td>\n","      <td>24.00</td>\n","      <td>30.0</td>\n","      <td>39.0</td>\n","      <td>48.0</td>\n","      <td>57.00</td>\n","      <td>78.000</td>\n","      <td>122.0</td>\n","    </tr>\n","    <tr>\n","      <th>WindSpeed9am</th>\n","      <td>1483.0</td>\n","      <td>13.986514</td>\n","      <td>9.124337</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.000</td>\n","      <td>4.00</td>\n","      <td>7.0</td>\n","      <td>13.0</td>\n","      <td>20.0</td>\n","      <td>26.00</td>\n","      <td>39.360</td>\n","      <td>72.0</td>\n","    </tr>\n","    <tr>\n","      <th>WindSpeed3pm</th>\n","      <td>1482.0</td>\n","      <td>18.601215</td>\n","      <td>8.850446</td>\n","      <td>0.0</td>\n","      <td>2.000</td>\n","      <td>6.000</td>\n","      <td>7.00</td>\n","      <td>13.0</td>\n","      <td>19.0</td>\n","      <td>24.0</td>\n","      <td>31.00</td>\n","      <td>43.000</td>\n","      <td>56.0</td>\n","    </tr>\n","    <tr>\n","      <th>Humidity9am</th>\n","      <td>1477.0</td>\n","      <td>68.688558</td>\n","      <td>18.876448</td>\n","      <td>4.0</td>\n","      <td>20.000</td>\n","      <td>36.000</td>\n","      <td>44.00</td>\n","      <td>57.0</td>\n","      <td>69.0</td>\n","      <td>82.0</td>\n","      <td>95.00</td>\n","      <td>100.000</td>\n","      <td>100.0</td>\n","    </tr>\n","    <tr>\n","      <th>Humidity3pm</th>\n","      <td>1472.0</td>\n","      <td>51.431386</td>\n","      <td>20.459957</td>\n","      <td>2.0</td>\n","      <td>8.710</td>\n","      <td>18.000</td>\n","      <td>23.00</td>\n","      <td>37.0</td>\n","      <td>52.0</td>\n","      <td>66.0</td>\n","      <td>78.00</td>\n","      <td>96.290</td>\n","      <td>100.0</td>\n","    </tr>\n","    <tr>\n","      <th>Pressure9am</th>\n","      <td>1352.0</td>\n","      <td>1017.763536</td>\n","      <td>6.910275</td>\n","      <td>988.5</td>\n","      <td>1000.900</td>\n","      <td>1006.255</td>\n","      <td>1008.61</td>\n","      <td>1013.2</td>\n","      <td>1017.8</td>\n","      <td>1022.3</td>\n","      <td>1026.50</td>\n","      <td>1033.449</td>\n","      <td>1038.2</td>\n","    </tr>\n","    <tr>\n","      <th>Pressure3pm</th>\n","      <td>1350.0</td>\n","      <td>1015.397926</td>\n","      <td>6.916976</td>\n","      <td>986.2</td>\n","      <td>999.198</td>\n","      <td>1003.900</td>\n","      <td>1006.49</td>\n","      <td>1010.9</td>\n","      <td>1015.4</td>\n","      <td>1020.0</td>\n","      <td>1024.20</td>\n","      <td>1031.151</td>\n","      <td>1036.9</td>\n","    </tr>\n","    <tr>\n","      <th>Cloud9am</th>\n","      <td>940.0</td>\n","      <td>4.494681</td>\n","      <td>2.870468</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.000</td>\n","      <td>1.00</td>\n","      <td>1.0</td>\n","      <td>5.0</td>\n","      <td>7.0</td>\n","      <td>8.00</td>\n","      <td>8.000</td>\n","      <td>8.0</td>\n","    </tr>\n","    <tr>\n","      <th>Cloud3pm</th>\n","      <td>917.0</td>\n","      <td>4.403490</td>\n","      <td>2.731969</td>\n","      <td>0.0</td>\n","      <td>0.000</td>\n","      <td>0.000</td>\n","      <td>1.00</td>\n","      <td>2.0</td>\n","      <td>5.0</td>\n","      <td>7.0</td>\n","      <td>8.00</td>\n","      <td>8.000</td>\n","      <td>8.0</td>\n","    </tr>\n","    <tr>\n","      <th>Temp9am</th>\n","      <td>1486.0</td>\n","      <td>16.751817</td>\n","      <td>6.339816</td>\n","      <td>-5.3</td>\n","      <td>2.370</td>\n","      <td>6.725</td>\n","      <td>9.00</td>\n","      <td>12.1</td>\n","      <td>16.5</td>\n","      <td>21.3</td>\n","      <td>25.45</td>\n","      <td>30.200</td>\n","      <td>35.1</td>\n","    </tr>\n","    <tr>\n","      <th>Temp3pm</th>\n","      <td>1481.0</td>\n","      <td>21.483660</td>\n","      <td>6.770567</td>\n","      <td>-1.2</td>\n","      <td>8.540</td>\n","      <td>11.800</td>\n","      <td>13.30</td>\n","      <td>16.5</td>\n","      <td>20.9</td>\n","      <td>26.2</td>\n","      <td>30.90</td>\n","      <td>37.400</td>\n","      <td>42.9</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                count         mean        std    min        1%        5%  \\\n","MinTemp        1493.0    11.916812   6.375377   -8.5    -2.024     1.600   \n","MaxTemp        1498.0    22.906809   6.986043   -0.8     9.134    13.000   \n","Rainfall       1483.0     2.241807   7.988822    0.0     0.000     0.000   \n","Evaporation     858.0     5.657809   4.105762    0.0     0.400     1.000   \n","Sunshine        781.0     7.677465   3.862294    0.0     0.000     0.300   \n","WindGustSpeed  1406.0    40.044097  14.027052    9.0    15.000    20.000   \n","WindSpeed9am   1483.0    13.986514   9.124337    0.0     0.000     0.000   \n","WindSpeed3pm   1482.0    18.601215   8.850446    0.0     2.000     6.000   \n","Humidity9am    1477.0    68.688558  18.876448    4.0    20.000    36.000   \n","Humidity3pm    1472.0    51.431386  20.459957    2.0     8.710    18.000   \n","Pressure9am    1352.0  1017.763536   6.910275  988.5  1000.900  1006.255   \n","Pressure3pm    1350.0  1015.397926   6.916976  986.2   999.198  1003.900   \n","Cloud9am        940.0     4.494681   2.870468    0.0     0.000     0.000   \n","Cloud3pm        917.0     4.403490   2.731969    0.0     0.000     0.000   \n","Temp9am        1486.0    16.751817   6.339816   -5.3     2.370     6.725   \n","Temp3pm        1481.0    21.483660   6.770567   -1.2     8.540    11.800   \n","\n","                   10%     25%     50%     75%      90%       99%     max  \n","MinTemp           3.70     7.3    11.8    16.5    20.48    25.316    28.3  \n","MaxTemp          14.50    17.8    22.4    27.8    32.60    38.303    45.1  \n","Rainfall          0.00     0.0     0.0     0.8     5.20    35.372   108.2  \n","Evaporation       1.60     2.8     4.8     7.6    10.40    19.458    38.8  \n","Sunshine          1.50     4.7     8.6    10.7    12.20    13.400    13.9  \n","WindGustSpeed    24.00    30.0    39.0    48.0    57.00    78.000   122.0  \n","WindSpeed9am      4.00     7.0    13.0    20.0    26.00    39.360    72.0  \n","WindSpeed3pm      7.00    13.0    19.0    24.0    31.00    43.000    56.0  \n","Humidity9am      44.00    57.0    69.0    82.0    95.00   100.000   100.0  \n","Humidity3pm      23.00    37.0    52.0    66.0    78.00    96.290   100.0  \n","Pressure9am    1008.61  1013.2  1017.8  1022.3  1026.50  1033.449  1038.2  \n","Pressure3pm    1006.49  1010.9  1015.4  1020.0  1024.20  1031.151  1036.9  \n","Cloud9am          1.00     1.0     5.0     7.0     8.00     8.000     8.0  \n","Cloud3pm          1.00     2.0     5.0     7.0     8.00     8.000     8.0  \n","Temp9am           9.00    12.1    16.5    21.3    25.45    30.200    35.1  \n","Temp3pm          13.30    16.5    20.9    26.2    30.90    37.400    42.9  "]},"metadata":{"tags":[]},"execution_count":26}]},{"metadata":{"id":"Hv0RUT0NJGwe","colab_type":"code","colab":{}},"cell_type":"code","source":["#对于去kaggle上下载了数据的小伙伴们，以及对于坚持要使用完整版数据的（15W行）数据的小伙伴们\n","#如果你发现了异常值，首先你要观察，这个异常值出现的频率\n","#如果异常值只出现了一次，多半是输入错误，直接把异常值删除\n","#如果异常值出现了多次，去跟业务人员沟通，人为造成的错误异常值留着是没有用的\n","#如果异常值占到你总数据量的10%左右了 - 把异常值替换成非异常但是非干扰的项，比如说用0来进行替换，或者把异常当缺失"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Ai-ChUTyJGwg","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":338},"outputId":"f8e0bb4b-93d3-4df0-e21f-cf46c19541bd","executionInfo":{"status":"ok","timestamp":1546763324928,"user_tz":-480,"elapsed":946,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.head()"],"execution_count":27,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Date</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed9am</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>2015-08-24</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2016-12-10</td>\n","      <td>Tuggeranong</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>7.0</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2010-04-18</td>\n","      <td>Albany</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>2009-11-26</td>\n","      <td>Sale</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>11.0</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>2014-04-25</td>\n","      <td>Mildura</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 21 columns</p>\n","</div>"],"text/plain":["         Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0  2015-08-24    Katherine     17.5     36.0       0.0          8.8       NaN   \n","1  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n","2  2010-04-18       Albany     13.0     22.6       0.0          3.8      10.4   \n","3  2009-11-26         Sale     13.9     29.8       0.0          5.8       5.1   \n","4  2014-04-25      Mildura      6.0     23.5       0.0          2.8       8.6   \n","\n","  WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  WindSpeed3pm  \\\n","0         ESE           26.0        NNW   ...            17.0          15.0   \n","1         NNW           33.0         NE   ...             7.0          17.0   \n","2         NaN            NaN         NE   ...            17.0          31.0   \n","3           S           37.0          N   ...            11.0          28.0   \n","4         NNE           24.0          E   ...            15.0          15.0   \n","\n","   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n","0         57.0          NaN       1016.8       1012.2       0.0       NaN   \n","1         59.0         31.0       1020.4       1017.5       NaN       NaN   \n","2         79.0         68.0       1020.3       1015.7       1.0       3.0   \n","3         82.0         44.0       1012.5       1005.9       6.0       6.0   \n","4         58.0         35.0       1019.8       1014.1       2.0       4.0   \n","\n","   Temp9am  Temp3pm  \n","0     27.5      NaN  \n","1     14.6     23.6  \n","2     17.5     20.8  \n","3     18.5     27.5  \n","4     12.4     22.4  \n","\n","[5 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":27}]},{"metadata":{"id":"neOw3L7ESyoO","colab_type":"text"},"cell_type":"markdown","source":["## 2 处理困难特征：日期"]},{"metadata":{"id":"nExwVruSJGwm","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"cd14119d-d3e8-4e9f-8938-85cb3e5cfe23","executionInfo":{"status":"ok","timestamp":1546763790779,"user_tz":-480,"elapsed":670,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["type(Xtrain.iloc[0,0]) #字符串"],"execution_count":28,"outputs":[{"output_type":"execute_result","data":{"text/plain":["str"]},"metadata":{"tags":[]},"execution_count":28}]},{"metadata":{"id":"MixTock-JGw5","colab_type":"code","colab":{}},"cell_type":"code","source":["#我们现在拥有的日期特征，是连续型特征，还是分类型特征\n","#2019-1-6\n","#2019-1-6.5\n","#日期是一年分了365类的分类型变量\n","#我们的日期特征中，日期是否有重复"],"execution_count":0,"outputs":[]},{"metadata":{"id":"QKScUaIDJGw9","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":1071},"outputId":"f98393e3-cb6c-4bb4-fb6b-9f2b3e5663e0","executionInfo":{"status":"ok","timestamp":1546763975899,"user_tz":-480,"elapsed":1719,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.iloc[:,0].value_counts()"],"execution_count":29,"outputs":[{"output_type":"execute_result","data":{"text/plain":["2014-05-16    6\n","2015-10-12    6\n","2015-07-03    6\n","2011-09-04    5\n","2014-07-26    5\n","2016-09-07    5\n","2012-09-18    5\n","2017-01-09    5\n","2009-03-30    5\n","2014-02-12    5\n","2011-07-19    5\n","2010-11-03    5\n","2009-07-17    5\n","2014-06-16    5\n","2013-12-20    5\n","2010-05-18    5\n","2014-03-12    5\n","2011-11-04    5\n","2016-11-01    5\n","2012-11-23    5\n","2012-07-18    5\n","2009-06-29    5\n","2012-05-02    4\n","2014-01-02    4\n","2010-12-11    4\n","2012-04-28    4\n","2012-09-23    4\n","2014-04-22    4\n","2016-09-14    4\n","2010-12-29    4\n","             ..\n","2008-10-30    1\n","2009-07-03    1\n","2013-12-12    1\n","2016-08-23    1\n","2015-08-10    1\n","2014-08-16    1\n","2009-10-13    1\n","2012-04-29    1\n","2014-04-21    1\n","2016-02-21    1\n","2017-01-28    1\n","2010-05-04    1\n","2015-08-03    1\n","2015-01-31    1\n","2009-05-27    1\n","2014-02-15    1\n","2011-06-28    1\n","2012-10-03    1\n","2015-10-02    1\n","2011-09-13    1\n","2009-02-22    1\n","2014-06-13    1\n","2011-08-28    1\n","2015-11-05    1\n","2012-07-17    1\n","2015-02-03    1\n","2012-08-07    1\n","2008-10-27    1\n","2015-07-11    1\n","2011-11-24    1\n","Name: Date, Length: 2141, dtype: int64"]},"metadata":{"tags":[]},"execution_count":29}]},{"metadata":{"id":"Frmlq99WJGwu","colab_type":"code","colab":{}},"cell_type":"code","source":["Xtrainc = Xtrain.copy()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"isbtbXzeJGwy","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":3009},"outputId":"8016f3e7-42fb-4d13-9a38-a348bc22a254","executionInfo":{"status":"ok","timestamp":1546764334031,"user_tz":-480,"elapsed":748,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrainc.sort_values(by=\"Location\")"],"execution_count":32,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Date</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed9am</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>2796</th>\n","      <td>2015-03-24</td>\n","      <td>Adelaide</td>\n","      <td>12.3</td>\n","      <td>19.3</td>\n","      <td>0.0</td>\n","      <td>5.0</td>\n","      <td>NaN</td>\n","      <td>S</td>\n","      <td>39.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>13.0</td>\n","      <td>19.0</td>\n","      <td>59.0</td>\n","      <td>47.0</td>\n","      <td>1022.2</td>\n","      <td>1021.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>15.1</td>\n","      <td>17.7</td>\n","    </tr>\n","    <tr>\n","      <th>2975</th>\n","      <td>2012-08-17</td>\n","      <td>Adelaide</td>\n","      <td>7.8</td>\n","      <td>13.2</td>\n","      <td>17.6</td>\n","      <td>0.8</td>\n","      <td>NaN</td>\n","      <td>SW</td>\n","      <td>61.0</td>\n","      <td>SW</td>\n","      <td>...</td>\n","      <td>20.0</td>\n","      <td>28.0</td>\n","      <td>76.0</td>\n","      <td>47.0</td>\n","      <td>1012.5</td>\n","      <td>1014.7</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>8.3</td>\n","      <td>12.5</td>\n","    </tr>\n","    <tr>\n","      <th>775</th>\n","      <td>2013-03-16</td>\n","      <td>Adelaide</td>\n","      <td>17.4</td>\n","      <td>23.8</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>9.7</td>\n","      <td>SSE</td>\n","      <td>46.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>9.0</td>\n","      <td>19.0</td>\n","      <td>63.0</td>\n","      <td>57.0</td>\n","      <td>1019.9</td>\n","      <td>1020.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>19.1</td>\n","      <td>20.7</td>\n","    </tr>\n","    <tr>\n","      <th>861</th>\n","      <td>2011-07-12</td>\n","      <td>Adelaide</td>\n","      <td>7.9</td>\n","      <td>11.4</td>\n","      <td>0.0</td>\n","      <td>1.0</td>\n","      <td>0.5</td>\n","      <td>N</td>\n","      <td>20.0</td>\n","      <td>NNE</td>\n","      <td>...</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>70.0</td>\n","      <td>59.0</td>\n","      <td>1028.7</td>\n","      <td>1025.7</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>8.4</td>\n","      <td>11.3</td>\n","    </tr>\n","    <tr>\n","      <th>2906</th>\n","      <td>2015-08-24</td>\n","      <td>Adelaide</td>\n","      <td>9.2</td>\n","      <td>14.3</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SE</td>\n","      <td>48.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>19.0</td>\n","      <td>64.0</td>\n","      <td>42.0</td>\n","      <td>1024.7</td>\n","      <td>1024.1</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>9.9</td>\n","      <td>13.4</td>\n","    </tr>\n","    <tr>\n","      <th>2900</th>\n","      <td>2009-09-17</td>\n","      <td>Adelaide</td>\n","      <td>14.2</td>\n","      <td>17.4</td>\n","      <td>8.8</td>\n","      <td>2.0</td>\n","      <td>7.1</td>\n","      <td>SW</td>\n","      <td>41.0</td>\n","      <td>SSW</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>20.0</td>\n","      <td>82.0</td>\n","      <td>56.0</td>\n","      <td>1014.9</td>\n","      <td>1018.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>16.2</td>\n","      <td>16.7</td>\n","    </tr>\n","    <tr>\n","      <th>902</th>\n","      <td>2008-12-10</td>\n","      <td>Adelaide</td>\n","      <td>14.2</td>\n","      <td>28.0</td>\n","      <td>0.0</td>\n","      <td>6.4</td>\n","      <td>12.5</td>\n","      <td>SE</td>\n","      <td>48.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>13.0</td>\n","      <td>13.0</td>\n","      <td>56.0</td>\n","      <td>35.0</td>\n","      <td>1010.8</td>\n","      <td>1008.9</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>19.3</td>\n","      <td>25.6</td>\n","    </tr>\n","    <tr>\n","      <th>919</th>\n","      <td>2011-10-12</td>\n","      <td>Adelaide</td>\n","      <td>7.7</td>\n","      <td>19.9</td>\n","      <td>0.0</td>\n","      <td>3.4</td>\n","      <td>11.4</td>\n","      <td>W</td>\n","      <td>30.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>13.0</td>\n","      <td>15.0</td>\n","      <td>56.0</td>\n","      <td>43.0</td>\n","      <td>1021.2</td>\n","      <td>1018.1</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.1</td>\n","      <td>19.4</td>\n","    </tr>\n","    <tr>\n","      <th>936</th>\n","      <td>2014-03-11</td>\n","      <td>Adelaide</td>\n","      <td>22.3</td>\n","      <td>32.2</td>\n","      <td>0.4</td>\n","      <td>20.6</td>\n","      <td>3.2</td>\n","      <td>W</td>\n","      <td>65.0</td>\n","      <td>ESE</td>\n","      <td>...</td>\n","      <td>9.0</td>\n","      <td>9.0</td>\n","      <td>53.0</td>\n","      <td>78.0</td>\n","      <td>1017.0</td>\n","      <td>1017.2</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>25.6</td>\n","      <td>22.8</td>\n","    </tr>\n","    <tr>\n","      <th>942</th>\n","      <td>2015-02-23</td>\n","      <td>Adelaide</td>\n","      <td>20.6</td>\n","      <td>26.5</td>\n","      <td>0.0</td>\n","      <td>16.2</td>\n","      <td>NaN</td>\n","      <td>SSE</td>\n","      <td>48.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>20.0</td>\n","      <td>22.0</td>\n","      <td>61.0</td>\n","      <td>43.0</td>\n","      <td>1015.3</td>\n","      <td>1016.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>22.2</td>\n","      <td>25.7</td>\n","    </tr>\n","    <tr>\n","      <th>999</th>\n","      <td>2009-11-30</td>\n","      <td>Adelaide</td>\n","      <td>13.1</td>\n","      <td>22.4</td>\n","      <td>0.6</td>\n","      <td>13.6</td>\n","      <td>10.4</td>\n","      <td>SSE</td>\n","      <td>37.0</td>\n","      <td>SSE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>15.0</td>\n","      <td>66.0</td>\n","      <td>43.0</td>\n","      <td>1017.9</td>\n","      <td>1016.8</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>16.1</td>\n","      <td>20.9</td>\n","    </tr>\n","    <tr>\n","      <th>1013</th>\n","      <td>2012-08-23</td>\n","      <td>Adelaide</td>\n","      <td>8.1</td>\n","      <td>12.4</td>\n","      <td>6.0</td>\n","      <td>1.8</td>\n","      <td>NaN</td>\n","      <td>WNW</td>\n","      <td>74.0</td>\n","      <td>NW</td>\n","      <td>...</td>\n","      <td>26.0</td>\n","      <td>37.0</td>\n","      <td>74.0</td>\n","      <td>78.0</td>\n","      <td>1002.6</td>\n","      <td>1005.6</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>10.7</td>\n","      <td>7.4</td>\n","    </tr>\n","    <tr>\n","      <th>1042</th>\n","      <td>2008-11-03</td>\n","      <td>Adelaide</td>\n","      <td>13.3</td>\n","      <td>21.2</td>\n","      <td>0.0</td>\n","      <td>15.2</td>\n","      <td>10.0</td>\n","      <td>SSE</td>\n","      <td>39.0</td>\n","      <td>SSW</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>20.0</td>\n","      <td>50.0</td>\n","      <td>39.0</td>\n","      <td>1021.9</td>\n","      <td>1020.1</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.8</td>\n","      <td>19.9</td>\n","    </tr>\n","    <tr>\n","      <th>701</th>\n","      <td>2009-01-08</td>\n","      <td>Adelaide</td>\n","      <td>14.3</td>\n","      <td>23.8</td>\n","      <td>0.0</td>\n","      <td>7.4</td>\n","      <td>12.7</td>\n","      <td>SE</td>\n","      <td>37.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>22.0</td>\n","      <td>45.0</td>\n","      <td>30.0</td>\n","      <td>1019.9</td>\n","      <td>1019.2</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>17.6</td>\n","      <td>23.2</td>\n","    </tr>\n","    <tr>\n","      <th>109</th>\n","      <td>2012-05-02</td>\n","      <td>Adelaide</td>\n","      <td>9.4</td>\n","      <td>16.5</td>\n","      <td>12.4</td>\n","      <td>0.8</td>\n","      <td>NaN</td>\n","      <td>SSE</td>\n","      <td>39.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>9.0</td>\n","      <td>15.0</td>\n","      <td>53.0</td>\n","      <td>47.0</td>\n","      <td>1029.6</td>\n","      <td>1028.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>13.5</td>\n","      <td>14.6</td>\n","    </tr>\n","    <tr>\n","      <th>227</th>\n","      <td>2016-08-31</td>\n","      <td>Adelaide</td>\n","      <td>11.9</td>\n","      <td>16.8</td>\n","      <td>1.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SW</td>\n","      <td>28.0</td>\n","      <td>WNW</td>\n","      <td>...</td>\n","      <td>11.0</td>\n","      <td>13.0</td>\n","      <td>80.0</td>\n","      <td>79.0</td>\n","      <td>1018.5</td>\n","      <td>1017.7</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.3</td>\n","      <td>15.2</td>\n","    </tr>\n","    <tr>\n","      <th>1190</th>\n","      <td>2015-07-01</td>\n","      <td>Adelaide</td>\n","      <td>4.7</td>\n","      <td>14.5</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>WSW</td>\n","      <td>20.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>6.0</td>\n","      <td>13.0</td>\n","      <td>78.0</td>\n","      <td>48.0</td>\n","      <td>1030.6</td>\n","      <td>1027.6</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>8.8</td>\n","      <td>13.1</td>\n","    </tr>\n","    <tr>\n","      <th>2701</th>\n","      <td>2013-05-16</td>\n","      <td>Adelaide</td>\n","      <td>10.7</td>\n","      <td>17.5</td>\n","      <td>7.0</td>\n","      <td>1.4</td>\n","      <td>7.1</td>\n","      <td>SW</td>\n","      <td>35.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>6.0</td>\n","      <td>19.0</td>\n","      <td>88.0</td>\n","      <td>54.0</td>\n","      <td>1016.3</td>\n","      <td>1015.9</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>13.1</td>\n","      <td>16.9</td>\n","    </tr>\n","    <tr>\n","      <th>1227</th>\n","      <td>2016-06-22</td>\n","      <td>Adelaide</td>\n","      <td>10.2</td>\n","      <td>16.5</td>\n","      <td>11.8</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>WNW</td>\n","      <td>39.0</td>\n","      <td>W</td>\n","      <td>...</td>\n","      <td>9.0</td>\n","      <td>17.0</td>\n","      <td>67.0</td>\n","      <td>63.0</td>\n","      <td>1010.3</td>\n","      <td>1010.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>13.5</td>\n","      <td>15.5</td>\n","    </tr>\n","    <tr>\n","      <th>1234</th>\n","      <td>2017-01-09</td>\n","      <td>Adelaide</td>\n","      <td>20.2</td>\n","      <td>30.4</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SW</td>\n","      <td>24.0</td>\n","      <td>ESE</td>\n","      <td>...</td>\n","      <td>6.0</td>\n","      <td>9.0</td>\n","      <td>70.0</td>\n","      <td>38.0</td>\n","      <td>1012.7</td>\n","      <td>1011.3</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>20.9</td>\n","      <td>28.9</td>\n","    </tr>\n","    <tr>\n","      <th>1266</th>\n","      <td>2008-11-07</td>\n","      <td>Adelaide</td>\n","      <td>18.3</td>\n","      <td>22.5</td>\n","      <td>0.2</td>\n","      <td>8.0</td>\n","      <td>1.4</td>\n","      <td>WNW</td>\n","      <td>56.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>28.0</td>\n","      <td>58.0</td>\n","      <td>51.0</td>\n","      <td>1001.0</td>\n","      <td>1004.7</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>22.4</td>\n","      <td>18.4</td>\n","    </tr>\n","    <tr>\n","      <th>1282</th>\n","      <td>2012-09-03</td>\n","      <td>Adelaide</td>\n","      <td>7.7</td>\n","      <td>22.9</td>\n","      <td>0.0</td>\n","      <td>7.0</td>\n","      <td>NaN</td>\n","      <td>SE</td>\n","      <td>56.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>13.0</td>\n","      <td>17.0</td>\n","      <td>27.0</td>\n","      <td>22.0</td>\n","      <td>1021.1</td>\n","      <td>1018.2</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>17.5</td>\n","      <td>22.1</td>\n","    </tr>\n","    <tr>\n","      <th>1289</th>\n","      <td>2010-03-09</td>\n","      <td>Adelaide</td>\n","      <td>14.3</td>\n","      <td>22.0</td>\n","      <td>1.6</td>\n","      <td>16.4</td>\n","      <td>7.8</td>\n","      <td>SW</td>\n","      <td>39.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>11.0</td>\n","      <td>20.0</td>\n","      <td>75.0</td>\n","      <td>36.0</td>\n","      <td>1021.2</td>\n","      <td>1022.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>16.8</td>\n","      <td>21.6</td>\n","    </tr>\n","    <tr>\n","      <th>86</th>\n","      <td>2010-11-08</td>\n","      <td>Adelaide</td>\n","      <td>13.7</td>\n","      <td>23.1</td>\n","      <td>0.0</td>\n","      <td>16.0</td>\n","      <td>12.2</td>\n","      <td>W</td>\n","      <td>31.0</td>\n","      <td>WNW</td>\n","      <td>...</td>\n","      <td>13.0</td>\n","      <td>17.0</td>\n","      <td>61.0</td>\n","      <td>38.0</td>\n","      <td>1016.0</td>\n","      <td>1016.3</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>17.2</td>\n","      <td>22.5</td>\n","    </tr>\n","    <tr>\n","      <th>83</th>\n","      <td>2012-10-08</td>\n","      <td>Adelaide</td>\n","      <td>10.2</td>\n","      <td>23.6</td>\n","      <td>0.2</td>\n","      <td>10.0</td>\n","      <td>NaN</td>\n","      <td>E</td>\n","      <td>33.0</td>\n","      <td>NNE</td>\n","      <td>...</td>\n","      <td>11.0</td>\n","      <td>17.0</td>\n","      <td>46.0</td>\n","      <td>28.0</td>\n","      <td>1016.7</td>\n","      <td>1012.9</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>15.1</td>\n","      <td>22.6</td>\n","    </tr>\n","    <tr>\n","      <th>1356</th>\n","      <td>2014-03-05</td>\n","      <td>Adelaide</td>\n","      <td>16.7</td>\n","      <td>24.8</td>\n","      <td>0.0</td>\n","      <td>6.6</td>\n","      <td>11.7</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>24.0</td>\n","      <td>61.0</td>\n","      <td>48.0</td>\n","      <td>1019.3</td>\n","      <td>1018.9</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>20.8</td>\n","      <td>23.7</td>\n","    </tr>\n","    <tr>\n","      <th>1390</th>\n","      <td>2015-10-04</td>\n","      <td>Adelaide</td>\n","      <td>15.4</td>\n","      <td>29.7</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>WNW</td>\n","      <td>20.0</td>\n","      <td>NaN</td>\n","      <td>...</td>\n","      <td>0.0</td>\n","      <td>9.0</td>\n","      <td>44.0</td>\n","      <td>22.0</td>\n","      <td>1024.0</td>\n","      <td>1022.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>18.3</td>\n","      <td>27.4</td>\n","    </tr>\n","    <tr>\n","      <th>107</th>\n","      <td>2015-10-01</td>\n","      <td>Adelaide</td>\n","      <td>7.8</td>\n","      <td>24.4</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>WSW</td>\n","      <td>22.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>9.0</td>\n","      <td>15.0</td>\n","      <td>35.0</td>\n","      <td>23.0</td>\n","      <td>1028.4</td>\n","      <td>1028.6</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>19.1</td>\n","      <td>23.8</td>\n","    </tr>\n","    <tr>\n","      <th>3403</th>\n","      <td>2015-12-05</td>\n","      <td>Adelaide</td>\n","      <td>20.7</td>\n","      <td>40.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SE</td>\n","      <td>31.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>9.0</td>\n","      <td>13.0</td>\n","      <td>18.0</td>\n","      <td>13.0</td>\n","      <td>1015.3</td>\n","      <td>1012.1</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>33.4</td>\n","      <td>38.6</td>\n","    </tr>\n","    <tr>\n","      <th>3023</th>\n","      <td>2012-11-06</td>\n","      <td>Adelaide</td>\n","      <td>14.7</td>\n","      <td>22.8</td>\n","      <td>8.0</td>\n","      <td>3.2</td>\n","      <td>NaN</td>\n","      <td>NW</td>\n","      <td>31.0</td>\n","      <td>W</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>15.0</td>\n","      <td>64.0</td>\n","      <td>46.0</td>\n","      <td>1011.9</td>\n","      <td>1011.6</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>17.7</td>\n","      <td>21.0</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>179</th>\n","      <td>2015-12-20</td>\n","      <td>Woomera</td>\n","      <td>27.1</td>\n","      <td>31.3</td>\n","      <td>0.4</td>\n","      <td>18.0</td>\n","      <td>2.1</td>\n","      <td>SE</td>\n","      <td>56.0</td>\n","      <td>ENE</td>\n","      <td>...</td>\n","      <td>11.0</td>\n","      <td>35.0</td>\n","      <td>37.0</td>\n","      <td>41.0</td>\n","      <td>1007.7</td>\n","      <td>1007.7</td>\n","      <td>1.0</td>\n","      <td>NaN</td>\n","      <td>30.4</td>\n","      <td>29.7</td>\n","    </tr>\n","    <tr>\n","      <th>212</th>\n","      <td>2009-06-25</td>\n","      <td>Woomera</td>\n","      <td>9.1</td>\n","      <td>18.4</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>4.4</td>\n","      <td>N</td>\n","      <td>41.0</td>\n","      <td>NNE</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>20.0</td>\n","      <td>54.0</td>\n","      <td>90.0</td>\n","      <td>1012.9</td>\n","      <td>1008.8</td>\n","      <td>6.0</td>\n","      <td>8.0</td>\n","      <td>13.2</td>\n","      <td>14.2</td>\n","    </tr>\n","    <tr>\n","      <th>1392</th>\n","      <td>2015-11-06</td>\n","      <td>Woomera</td>\n","      <td>13.1</td>\n","      <td>28.3</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SE</td>\n","      <td>33.0</td>\n","      <td>SSE</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>7.0</td>\n","      <td>65.0</td>\n","      <td>36.0</td>\n","      <td>1015.9</td>\n","      <td>1013.2</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>17.6</td>\n","      <td>26.0</td>\n","    </tr>\n","    <tr>\n","      <th>1436</th>\n","      <td>2009-10-19</td>\n","      <td>Woomera</td>\n","      <td>12.6</td>\n","      <td>30.0</td>\n","      <td>0.0</td>\n","      <td>7.8</td>\n","      <td>12.3</td>\n","      <td>N</td>\n","      <td>44.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>13.0</td>\n","      <td>22.0</td>\n","      <td>20.0</td>\n","      <td>8.0</td>\n","      <td>1025.5</td>\n","      <td>1021.3</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>19.9</td>\n","      <td>28.7</td>\n","    </tr>\n","    <tr>\n","      <th>1431</th>\n","      <td>2012-01-16</td>\n","      <td>Woomera</td>\n","      <td>18.9</td>\n","      <td>38.8</td>\n","      <td>0.0</td>\n","      <td>12.0</td>\n","      <td>12.4</td>\n","      <td>N</td>\n","      <td>41.0</td>\n","      <td>ENE</td>\n","      <td>...</td>\n","      <td>20.0</td>\n","      <td>20.0</td>\n","      <td>10.0</td>\n","      <td>5.0</td>\n","      <td>1012.0</td>\n","      <td>1008.7</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>27.6</td>\n","      <td>37.9</td>\n","    </tr>\n","    <tr>\n","      <th>204</th>\n","      <td>2014-02-05</td>\n","      <td>Woomera</td>\n","      <td>23.1</td>\n","      <td>38.6</td>\n","      <td>0.0</td>\n","      <td>19.8</td>\n","      <td>6.3</td>\n","      <td>SE</td>\n","      <td>37.0</td>\n","      <td>ESE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>6.0</td>\n","      <td>18.0</td>\n","      <td>11.0</td>\n","      <td>1012.1</td>\n","      <td>1009.9</td>\n","      <td>3.0</td>\n","      <td>2.0</td>\n","      <td>26.8</td>\n","      <td>36.2</td>\n","    </tr>\n","    <tr>\n","      <th>225</th>\n","      <td>2013-08-23</td>\n","      <td>Woomera</td>\n","      <td>6.5</td>\n","      <td>17.7</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>3.6</td>\n","      <td>SW</td>\n","      <td>43.0</td>\n","      <td>WSW</td>\n","      <td>...</td>\n","      <td>22.0</td>\n","      <td>26.0</td>\n","      <td>76.0</td>\n","      <td>46.0</td>\n","      <td>1024.2</td>\n","      <td>1022.9</td>\n","      <td>1.0</td>\n","      <td>7.0</td>\n","      <td>13.2</td>\n","      <td>17.3</td>\n","    </tr>\n","    <tr>\n","      <th>400</th>\n","      <td>2016-02-09</td>\n","      <td>Woomera</td>\n","      <td>20.2</td>\n","      <td>39.4</td>\n","      <td>0.0</td>\n","      <td>17.6</td>\n","      <td>12.8</td>\n","      <td>SSE</td>\n","      <td>43.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>24.0</td>\n","      <td>9.0</td>\n","      <td>50.0</td>\n","      <td>12.0</td>\n","      <td>1016.8</td>\n","      <td>1013.3</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>24.0</td>\n","      <td>37.7</td>\n","    </tr>\n","    <tr>\n","      <th>1174</th>\n","      <td>2011-07-27</td>\n","      <td>Woomera</td>\n","      <td>4.9</td>\n","      <td>20.9</td>\n","      <td>0.0</td>\n","      <td>3.2</td>\n","      <td>10.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>...</td>\n","      <td>0.0</td>\n","      <td>11.0</td>\n","      <td>54.0</td>\n","      <td>29.0</td>\n","      <td>1026.8</td>\n","      <td>1024.0</td>\n","      <td>2.0</td>\n","      <td>0.0</td>\n","      <td>11.7</td>\n","      <td>20.3</td>\n","    </tr>\n","    <tr>\n","      <th>3413</th>\n","      <td>2009-03-27</td>\n","      <td>Woomera</td>\n","      <td>15.2</td>\n","      <td>30.0</td>\n","      <td>0.0</td>\n","      <td>8.5</td>\n","      <td>11.2</td>\n","      <td>SSE</td>\n","      <td>43.0</td>\n","      <td>SSE</td>\n","      <td>...</td>\n","      <td>20.0</td>\n","      <td>17.0</td>\n","      <td>52.0</td>\n","      <td>20.0</td>\n","      <td>1024.9</td>\n","      <td>1021.6</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>16.9</td>\n","      <td>28.3</td>\n","    </tr>\n","    <tr>\n","      <th>396</th>\n","      <td>2013-01-01</td>\n","      <td>Woomera</td>\n","      <td>21.5</td>\n","      <td>40.4</td>\n","      <td>0.0</td>\n","      <td>42.2</td>\n","      <td>NaN</td>\n","      <td>SE</td>\n","      <td>54.0</td>\n","      <td>ESE</td>\n","      <td>...</td>\n","      <td>20.0</td>\n","      <td>26.0</td>\n","      <td>17.0</td>\n","      <td>3.0</td>\n","      <td>1014.5</td>\n","      <td>1012.1</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>28.1</td>\n","      <td>39.6</td>\n","    </tr>\n","    <tr>\n","      <th>2867</th>\n","      <td>2009-10-07</td>\n","      <td>Woomera</td>\n","      <td>9.0</td>\n","      <td>21.5</td>\n","      <td>0.0</td>\n","      <td>7.8</td>\n","      <td>11.9</td>\n","      <td>SE</td>\n","      <td>35.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>22.0</td>\n","      <td>15.0</td>\n","      <td>52.0</td>\n","      <td>20.0</td>\n","      <td>1025.6</td>\n","      <td>1024.3</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>12.4</td>\n","      <td>19.9</td>\n","    </tr>\n","    <tr>\n","      <th>1741</th>\n","      <td>2010-03-12</td>\n","      <td>Woomera</td>\n","      <td>14.0</td>\n","      <td>30.7</td>\n","      <td>0.0</td>\n","      <td>12.0</td>\n","      <td>11.4</td>\n","      <td>E</td>\n","      <td>37.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>20.0</td>\n","      <td>13.0</td>\n","      <td>42.0</td>\n","      <td>26.0</td>\n","      <td>1028.3</td>\n","      <td>1025.0</td>\n","      <td>1.0</td>\n","      <td>1.0</td>\n","      <td>17.9</td>\n","      <td>28.9</td>\n","    </tr>\n","    <tr>\n","      <th>2380</th>\n","      <td>2013-01-15</td>\n","      <td>Woomera</td>\n","      <td>19.0</td>\n","      <td>36.3</td>\n","      <td>0.0</td>\n","      <td>13.0</td>\n","      <td>13.0</td>\n","      <td>ENE</td>\n","      <td>33.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>24.0</td>\n","      <td>15.0</td>\n","      <td>17.0</td>\n","      <td>6.0</td>\n","      <td>1014.2</td>\n","      <td>NaN</td>\n","      <td>2.0</td>\n","      <td>1.0</td>\n","      <td>25.6</td>\n","      <td>35.0</td>\n","    </tr>\n","    <tr>\n","      <th>975</th>\n","      <td>2015-02-26</td>\n","      <td>Woomera</td>\n","      <td>17.5</td>\n","      <td>36.5</td>\n","      <td>0.0</td>\n","      <td>15.0</td>\n","      <td>NaN</td>\n","      <td>SSE</td>\n","      <td>39.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>20.0</td>\n","      <td>20.0</td>\n","      <td>37.0</td>\n","      <td>6.0</td>\n","      <td>1011.3</td>\n","      <td>1008.5</td>\n","      <td>1.0</td>\n","      <td>0.0</td>\n","      <td>23.1</td>\n","      <td>34.7</td>\n","    </tr>\n","    <tr>\n","      <th>1716</th>\n","      <td>2016-12-19</td>\n","      <td>Woomera</td>\n","      <td>18.9</td>\n","      <td>33.0</td>\n","      <td>0.0</td>\n","      <td>10.8</td>\n","      <td>NaN</td>\n","      <td>WSW</td>\n","      <td>46.0</td>\n","      <td>NNE</td>\n","      <td>...</td>\n","      <td>26.0</td>\n","      <td>24.0</td>\n","      <td>20.0</td>\n","      <td>32.0</td>\n","      <td>1012.1</td>\n","      <td>1009.8</td>\n","      <td>5.0</td>\n","      <td>7.0</td>\n","      <td>27.2</td>\n","      <td>29.7</td>\n","    </tr>\n","    <tr>\n","      <th>2569</th>\n","      <td>2009-08-06</td>\n","      <td>Woomera</td>\n","      <td>10.7</td>\n","      <td>25.1</td>\n","      <td>0.0</td>\n","      <td>9.6</td>\n","      <td>10.4</td>\n","      <td>NNW</td>\n","      <td>57.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>24.0</td>\n","      <td>37.0</td>\n","      <td>32.0</td>\n","      <td>12.0</td>\n","      <td>1017.6</td>\n","      <td>1012.5</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>15.8</td>\n","      <td>24.5</td>\n","    </tr>\n","    <tr>\n","      <th>355</th>\n","      <td>2013-06-23</td>\n","      <td>Woomera</td>\n","      <td>7.8</td>\n","      <td>16.0</td>\n","      <td>1.2</td>\n","      <td>3.2</td>\n","      <td>7.4</td>\n","      <td>W</td>\n","      <td>37.0</td>\n","      <td>WNW</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>26.0</td>\n","      <td>97.0</td>\n","      <td>54.0</td>\n","      <td>1020.9</td>\n","      <td>1019.4</td>\n","      <td>5.0</td>\n","      <td>6.0</td>\n","      <td>9.7</td>\n","      <td>15.6</td>\n","    </tr>\n","    <tr>\n","      <th>592</th>\n","      <td>2013-04-03</td>\n","      <td>Woomera</td>\n","      <td>15.2</td>\n","      <td>26.9</td>\n","      <td>0.0</td>\n","      <td>6.8</td>\n","      <td>10.7</td>\n","      <td>SSE</td>\n","      <td>41.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>22.0</td>\n","      <td>13.0</td>\n","      <td>82.0</td>\n","      <td>33.0</td>\n","      <td>1023.3</td>\n","      <td>1021.0</td>\n","      <td>1.0</td>\n","      <td>4.0</td>\n","      <td>17.4</td>\n","      <td>26.2</td>\n","    </tr>\n","    <tr>\n","      <th>343</th>\n","      <td>2009-12-06</td>\n","      <td>Woomera</td>\n","      <td>16.1</td>\n","      <td>33.1</td>\n","      <td>0.0</td>\n","      <td>12.8</td>\n","      <td>13.1</td>\n","      <td>SSW</td>\n","      <td>39.0</td>\n","      <td>ESE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>17.0</td>\n","      <td>42.0</td>\n","      <td>12.0</td>\n","      <td>1016.7</td>\n","      <td>1013.4</td>\n","      <td>4.0</td>\n","      <td>6.0</td>\n","      <td>22.8</td>\n","      <td>32.1</td>\n","    </tr>\n","    <tr>\n","      <th>1994</th>\n","      <td>2015-06-06</td>\n","      <td>Woomera</td>\n","      <td>6.6</td>\n","      <td>20.2</td>\n","      <td>0.0</td>\n","      <td>6.6</td>\n","      <td>9.0</td>\n","      <td>NNE</td>\n","      <td>31.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>11.0</td>\n","      <td>17.0</td>\n","      <td>65.0</td>\n","      <td>39.0</td>\n","      <td>1030.1</td>\n","      <td>1026.3</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>12.7</td>\n","      <td>19.4</td>\n","    </tr>\n","    <tr>\n","      <th>2811</th>\n","      <td>2009-03-23</td>\n","      <td>Woomera</td>\n","      <td>13.4</td>\n","      <td>30.7</td>\n","      <td>0.0</td>\n","      <td>14.6</td>\n","      <td>10.8</td>\n","      <td>SSE</td>\n","      <td>35.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>26.0</td>\n","      <td>9.0</td>\n","      <td>48.0</td>\n","      <td>16.0</td>\n","      <td>1015.1</td>\n","      <td>1012.2</td>\n","      <td>1.0</td>\n","      <td>1.0</td>\n","      <td>16.6</td>\n","      <td>28.6</td>\n","    </tr>\n","    <tr>\n","      <th>1629</th>\n","      <td>2010-07-17</td>\n","      <td>Woomera</td>\n","      <td>5.1</td>\n","      <td>17.0</td>\n","      <td>0.0</td>\n","      <td>2.0</td>\n","      <td>9.5</td>\n","      <td>N</td>\n","      <td>37.0</td>\n","      <td>NNE</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>15.0</td>\n","      <td>72.0</td>\n","      <td>43.0</td>\n","      <td>1026.4</td>\n","      <td>1022.4</td>\n","      <td>3.0</td>\n","      <td>4.0</td>\n","      <td>9.4</td>\n","      <td>15.8</td>\n","    </tr>\n","    <tr>\n","      <th>320</th>\n","      <td>2014-11-04</td>\n","      <td>Woomera</td>\n","      <td>17.4</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>12.0</td>\n","      <td>11.0</td>\n","      <td>S</td>\n","      <td>54.0</td>\n","      <td>WNW</td>\n","      <td>...</td>\n","      <td>13.0</td>\n","      <td>30.0</td>\n","      <td>5.0</td>\n","      <td>24.0</td>\n","      <td>1016.2</td>\n","      <td>1016.1</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>25.7</td>\n","      <td>28.1</td>\n","    </tr>\n","    <tr>\n","      <th>1072</th>\n","      <td>2016-12-04</td>\n","      <td>Woomera</td>\n","      <td>24.2</td>\n","      <td>38.7</td>\n","      <td>0.0</td>\n","      <td>10.6</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>80.0</td>\n","      <td>SSW</td>\n","      <td>...</td>\n","      <td>13.0</td>\n","      <td>24.0</td>\n","      <td>43.0</td>\n","      <td>23.0</td>\n","      <td>1008.4</td>\n","      <td>1004.5</td>\n","      <td>7.0</td>\n","      <td>3.0</td>\n","      <td>28.3</td>\n","      <td>36.4</td>\n","    </tr>\n","    <tr>\n","      <th>2223</th>\n","      <td>2009-05-08</td>\n","      <td>Woomera</td>\n","      <td>9.2</td>\n","      <td>20.6</td>\n","      <td>0.0</td>\n","      <td>5.2</td>\n","      <td>10.4</td>\n","      <td>ESE</td>\n","      <td>37.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>19.0</td>\n","      <td>64.0</td>\n","      <td>34.0</td>\n","      <td>1030.5</td>\n","      <td>1026.9</td>\n","      <td>0.0</td>\n","      <td>1.0</td>\n","      <td>13.7</td>\n","      <td>20.1</td>\n","    </tr>\n","    <tr>\n","      <th>1984</th>\n","      <td>2014-05-26</td>\n","      <td>Woomera</td>\n","      <td>15.5</td>\n","      <td>23.6</td>\n","      <td>0.0</td>\n","      <td>24.0</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>43.0</td>\n","      <td>NNE</td>\n","      <td>...</td>\n","      <td>9.0</td>\n","      <td>26.0</td>\n","      <td>49.0</td>\n","      <td>37.0</td>\n","      <td>1014.2</td>\n","      <td>1010.3</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>18.0</td>\n","      <td>21.5</td>\n","    </tr>\n","    <tr>\n","      <th>1592</th>\n","      <td>2012-01-10</td>\n","      <td>Woomera</td>\n","      <td>16.8</td>\n","      <td>26.7</td>\n","      <td>0.0</td>\n","      <td>10.0</td>\n","      <td>5.3</td>\n","      <td>SW</td>\n","      <td>46.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>20.0</td>\n","      <td>22.0</td>\n","      <td>52.0</td>\n","      <td>33.0</td>\n","      <td>1019.1</td>\n","      <td>1016.8</td>\n","      <td>4.0</td>\n","      <td>6.0</td>\n","      <td>18.3</td>\n","      <td>24.9</td>\n","    </tr>\n","    <tr>\n","      <th>2824</th>\n","      <td>2015-11-03</td>\n","      <td>Woomera</td>\n","      <td>16.2</td>\n","      <td>28.5</td>\n","      <td>7.8</td>\n","      <td>4.2</td>\n","      <td>4.5</td>\n","      <td>WSW</td>\n","      <td>80.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>26.0</td>\n","      <td>50.0</td>\n","      <td>76.0</td>\n","      <td>53.0</td>\n","      <td>1009.6</td>\n","      <td>1006.8</td>\n","      <td>6.0</td>\n","      <td>7.0</td>\n","      <td>20.5</td>\n","      <td>26.2</td>\n","    </tr>\n","    <tr>\n","      <th>1005</th>\n","      <td>2010-05-14</td>\n","      <td>Woomera</td>\n","      <td>3.9</td>\n","      <td>19.3</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>10.5</td>\n","      <td>NE</td>\n","      <td>33.0</td>\n","      <td>ENE</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>13.0</td>\n","      <td>43.0</td>\n","      <td>19.0</td>\n","      <td>1020.2</td>\n","      <td>1016.4</td>\n","      <td>1.0</td>\n","      <td>1.0</td>\n","      <td>11.5</td>\n","      <td>18.5</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>3500 rows × 21 columns</p>\n","</div>"],"text/plain":["            Date  Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","2796  2015-03-24  Adelaide     12.3     19.3       0.0          5.0       NaN   \n","2975  2012-08-17  Adelaide      7.8     13.2      17.6          0.8       NaN   \n","775   2013-03-16  Adelaide     17.4     23.8       NaN          NaN       9.7   \n","861   2011-07-12  Adelaide      7.9     11.4       0.0          1.0       0.5   \n","2906  2015-08-24  Adelaide      9.2     14.3       0.0          NaN       NaN   \n","2900  2009-09-17  Adelaide     14.2     17.4       8.8          2.0       7.1   \n","902   2008-12-10  Adelaide     14.2     28.0       0.0          6.4      12.5   \n","919   2011-10-12  Adelaide      7.7     19.9       0.0          3.4      11.4   \n","936   2014-03-11  Adelaide     22.3     32.2       0.4         20.6       3.2   \n","942   2015-02-23  Adelaide     20.6     26.5       0.0         16.2       NaN   \n","999   2009-11-30  Adelaide     13.1     22.4       0.6         13.6      10.4   \n","1013  2012-08-23  Adelaide      8.1     12.4       6.0          1.8       NaN   \n","1042  2008-11-03  Adelaide     13.3     21.2       0.0         15.2      10.0   \n","701   2009-01-08  Adelaide     14.3     23.8       0.0          7.4      12.7   \n","109   2012-05-02  Adelaide      9.4     16.5      12.4          0.8       NaN   \n","227   2016-08-31  Adelaide     11.9     16.8       1.0          NaN       NaN   \n","1190  2015-07-01  Adelaide      4.7     14.5       0.0          NaN       NaN   \n","2701  2013-05-16  Adelaide     10.7     17.5       7.0          1.4       7.1   \n","1227  2016-06-22  Adelaide     10.2     16.5      11.8          NaN       NaN   \n","1234  2017-01-09  Adelaide     20.2     30.4       0.0          NaN       NaN   \n","1266  2008-11-07  Adelaide     18.3     22.5       0.2          8.0       1.4   \n","1282  2012-09-03  Adelaide      7.7     22.9       0.0          7.0       NaN   \n","1289  2010-03-09  Adelaide     14.3     22.0       1.6         16.4       7.8   \n","86    2010-11-08  Adelaide     13.7     23.1       0.0         16.0      12.2   \n","83    2012-10-08  Adelaide     10.2     23.6       0.2         10.0       NaN   \n","1356  2014-03-05  Adelaide     16.7     24.8       0.0          6.6      11.7   \n","1390  2015-10-04  Adelaide     15.4     29.7       0.0          NaN       NaN   \n","107   2015-10-01  Adelaide      7.8     24.4       0.0          NaN       NaN   \n","3403  2015-12-05  Adelaide     20.7     40.2       0.0          NaN       NaN   \n","3023  2012-11-06  Adelaide     14.7     22.8       8.0          3.2       NaN   \n","...          ...       ...      ...      ...       ...          ...       ...   \n","179   2015-12-20   Woomera     27.1     31.3       0.4         18.0       2.1   \n","212   2009-06-25   Woomera      9.1     18.4       0.0          3.8       4.4   \n","1392  2015-11-06   Woomera     13.1     28.3       0.0          NaN       NaN   \n","1436  2009-10-19   Woomera     12.6     30.0       0.0          7.8      12.3   \n","1431  2012-01-16   Woomera     18.9     38.8       0.0         12.0      12.4   \n","204   2014-02-05   Woomera     23.1     38.6       0.0         19.8       6.3   \n","225   2013-08-23   Woomera      6.5     17.7       0.0          3.8       3.6   \n","400   2016-02-09   Woomera     20.2     39.4       0.0         17.6      12.8   \n","1174  2011-07-27   Woomera      4.9     20.9       0.0          3.2      10.0   \n","3413  2009-03-27   Woomera     15.2     30.0       0.0          8.5      11.2   \n","396   2013-01-01   Woomera     21.5     40.4       0.0         42.2       NaN   \n","2867  2009-10-07   Woomera      9.0     21.5       0.0          7.8      11.9   \n","1741  2010-03-12   Woomera     14.0     30.7       0.0         12.0      11.4   \n","2380  2013-01-15   Woomera     19.0     36.3       0.0         13.0      13.0   \n","975   2015-02-26   Woomera     17.5     36.5       0.0         15.0       NaN   \n","1716  2016-12-19   Woomera     18.9     33.0       0.0         10.8       NaN   \n","2569  2009-08-06   Woomera     10.7     25.1       0.0          9.6      10.4   \n","355   2013-06-23   Woomera      7.8     16.0       1.2          3.2       7.4   \n","592   2013-04-03   Woomera     15.2     26.9       0.0          6.8      10.7   \n","343   2009-12-06   Woomera     16.1     33.1       0.0         12.8      13.1   \n","1994  2015-06-06   Woomera      6.6     20.2       0.0          6.6       9.0   \n","2811  2009-03-23   Woomera     13.4     30.7       0.0         14.6      10.8   \n","1629  2010-07-17   Woomera      5.1     17.0       0.0          2.0       9.5   \n","320   2014-11-04   Woomera     17.4     29.8       0.0         12.0      11.0   \n","1072  2016-12-04   Woomera     24.2     38.7       0.0         10.6       NaN   \n","2223  2009-05-08   Woomera      9.2     20.6       0.0          5.2      10.4   \n","1984  2014-05-26   Woomera     15.5     23.6       0.0         24.0       NaN   \n","1592  2012-01-10   Woomera     16.8     26.7       0.0         10.0       5.3   \n","2824  2015-11-03   Woomera     16.2     28.5       7.8          4.2       4.5   \n","1005  2010-05-14   Woomera      3.9     19.3       0.0          5.8      10.5   \n","\n","     WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  \\\n","2796           S           39.0          S   ...            13.0   \n","2975          SW           61.0         SW   ...            20.0   \n","775          SSE           46.0          S   ...             9.0   \n","861            N           20.0        NNE   ...             7.0   \n","2906          SE           48.0         SE   ...            17.0   \n","2900          SW           41.0        SSW   ...            15.0   \n","902           SE           48.0          E   ...            13.0   \n","919            W           30.0          E   ...            13.0   \n","936            W           65.0        ESE   ...             9.0   \n","942          SSE           48.0          S   ...            20.0   \n","999          SSE           37.0        SSE   ...            17.0   \n","1013         WNW           74.0         NW   ...            26.0   \n","1042         SSE           39.0        SSW   ...            15.0   \n","701           SE           37.0         SE   ...            17.0   \n","109          SSE           39.0          S   ...             9.0   \n","227           SW           28.0        WNW   ...            11.0   \n","1190         WSW           20.0         NE   ...             6.0   \n","2701          SW           35.0         NE   ...             6.0   \n","1227         WNW           39.0          W   ...             9.0   \n","1234          SW           24.0        ESE   ...             6.0   \n","1266         WNW           56.0          N   ...            17.0   \n","1282          SE           56.0          N   ...            13.0   \n","1289          SW           39.0          S   ...            11.0   \n","86             W           31.0        WNW   ...            13.0   \n","83             E           33.0        NNE   ...            11.0   \n","1356           S           37.0          S   ...            15.0   \n","1390         WNW           20.0        NaN   ...             0.0   \n","107          WSW           22.0          N   ...             9.0   \n","3403          SE           31.0         NE   ...             9.0   \n","3023          NW           31.0          W   ...            17.0   \n","...          ...            ...        ...   ...             ...   \n","179           SE           56.0        ENE   ...            11.0   \n","212            N           41.0        NNE   ...            19.0   \n","1392          SE           33.0        SSE   ...            19.0   \n","1436           N           44.0          N   ...            13.0   \n","1431           N           41.0        ENE   ...            20.0   \n","204           SE           37.0        ESE   ...            17.0   \n","225           SW           43.0        WSW   ...            22.0   \n","400          SSE           43.0         SE   ...            24.0   \n","1174         NaN            NaN        NaN   ...             0.0   \n","3413         SSE           43.0        SSE   ...            20.0   \n","396           SE           54.0        ESE   ...            20.0   \n","2867          SE           35.0         SE   ...            22.0   \n","1741           E           37.0         SE   ...            20.0   \n","2380         ENE           33.0          E   ...            24.0   \n","975          SSE           39.0         SE   ...            20.0   \n","1716         WSW           46.0        NNE   ...            26.0   \n","2569         NNW           57.0          N   ...            24.0   \n","355            W           37.0        WNW   ...            19.0   \n","592          SSE           41.0         SE   ...            22.0   \n","343          SSW           39.0        ESE   ...            17.0   \n","1994         NNE           31.0         NE   ...            11.0   \n","2811         SSE           35.0         SE   ...            26.0   \n","1629           N           37.0        NNE   ...            15.0   \n","320            S           54.0        WNW   ...            13.0   \n","1072         NNW           80.0        SSW   ...            13.0   \n","2223         ESE           37.0         SE   ...            19.0   \n","1984         NNW           43.0        NNE   ...             9.0   \n","1592          SW           46.0          S   ...            20.0   \n","2824         WSW           80.0         NE   ...            26.0   \n","1005          NE           33.0        ENE   ...            15.0   \n","\n","      WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  \\\n","2796          19.0         59.0         47.0       1022.2       1021.4   \n","2975          28.0         76.0         47.0       1012.5       1014.7   \n","775           19.0         63.0         57.0       1019.9       1020.5   \n","861            7.0         70.0         59.0       1028.7       1025.7   \n","2906          19.0         64.0         42.0       1024.7       1024.1   \n","2900          20.0         82.0         56.0       1014.9       1018.0   \n","902           13.0         56.0         35.0       1010.8       1008.9   \n","919           15.0         56.0         43.0       1021.2       1018.1   \n","936            9.0         53.0         78.0       1017.0       1017.2   \n","942           22.0         61.0         43.0       1015.3       1016.5   \n","999           15.0         66.0         43.0       1017.9       1016.8   \n","1013          37.0         74.0         78.0       1002.6       1005.6   \n","1042          20.0         50.0         39.0       1021.9       1020.1   \n","701           22.0         45.0         30.0       1019.9       1019.2   \n","109           15.0         53.0         47.0       1029.6       1028.5   \n","227           13.0         80.0         79.0       1018.5       1017.7   \n","1190          13.0         78.0         48.0       1030.6       1027.6   \n","2701          19.0         88.0         54.0       1016.3       1015.9   \n","1227          17.0         67.0         63.0       1010.3       1010.0   \n","1234           9.0         70.0         38.0       1012.7       1011.3   \n","1266          28.0         58.0         51.0       1001.0       1004.7   \n","1282          17.0         27.0         22.0       1021.1       1018.2   \n","1289          20.0         75.0         36.0       1021.2       1022.5   \n","86            17.0         61.0         38.0       1016.0       1016.3   \n","83            17.0         46.0         28.0       1016.7       1012.9   \n","1356          24.0         61.0         48.0       1019.3       1018.9   \n","1390           9.0         44.0         22.0       1024.0       1022.4   \n","107           15.0         35.0         23.0       1028.4       1028.6   \n","3403          13.0         18.0         13.0       1015.3       1012.1   \n","3023          15.0         64.0         46.0       1011.9       1011.6   \n","...            ...          ...          ...          ...          ...   \n","179           35.0         37.0         41.0       1007.7       1007.7   \n","212           20.0         54.0         90.0       1012.9       1008.8   \n","1392           7.0         65.0         36.0       1015.9       1013.2   \n","1436          22.0         20.0          8.0       1025.5       1021.3   \n","1431          20.0         10.0          5.0       1012.0       1008.7   \n","204            6.0         18.0         11.0       1012.1       1009.9   \n","225           26.0         76.0         46.0       1024.2       1022.9   \n","400            9.0         50.0         12.0       1016.8       1013.3   \n","1174          11.0         54.0         29.0       1026.8       1024.0   \n","3413          17.0         52.0         20.0       1024.9       1021.6   \n","396           26.0         17.0          3.0       1014.5       1012.1   \n","2867          15.0         52.0         20.0       1025.6       1024.3   \n","1741          13.0         42.0         26.0       1028.3       1025.0   \n","2380          15.0         17.0          6.0       1014.2          NaN   \n","975           20.0         37.0          6.0       1011.3       1008.5   \n","1716          24.0         20.0         32.0       1012.1       1009.8   \n","2569          37.0         32.0         12.0       1017.6       1012.5   \n","355           26.0         97.0         54.0       1020.9       1019.4   \n","592           13.0         82.0         33.0       1023.3       1021.0   \n","343           17.0         42.0         12.0       1016.7       1013.4   \n","1994          17.0         65.0         39.0       1030.1       1026.3   \n","2811           9.0         48.0         16.0       1015.1       1012.2   \n","1629          15.0         72.0         43.0       1026.4       1022.4   \n","320           30.0          5.0         24.0       1016.2       1016.1   \n","1072          24.0         43.0         23.0       1008.4       1004.5   \n","2223          19.0         64.0         34.0       1030.5       1026.9   \n","1984          26.0         49.0         37.0       1014.2       1010.3   \n","1592          22.0         52.0         33.0       1019.1       1016.8   \n","2824          50.0         76.0         53.0       1009.6       1006.8   \n","1005          13.0         43.0         19.0       1020.2       1016.4   \n","\n","      Cloud9am  Cloud3pm  Temp9am  Temp3pm  \n","2796       NaN       NaN     15.1     17.7  \n","2975       NaN       NaN      8.3     12.5  \n","775        NaN       NaN     19.1     20.7  \n","861        NaN       NaN      8.4     11.3  \n","2906       NaN       NaN      9.9     13.4  \n","2900       NaN       NaN     16.2     16.7  \n","902        NaN       NaN     19.3     25.6  \n","919        NaN       NaN     14.1     19.4  \n","936        NaN       NaN     25.6     22.8  \n","942        NaN       NaN     22.2     25.7  \n","999        NaN       NaN     16.1     20.9  \n","1013       NaN       NaN     10.7      7.4  \n","1042       NaN       NaN     14.8     19.9  \n","701        NaN       NaN     17.6     23.2  \n","109        NaN       NaN     13.5     14.6  \n","227        NaN       NaN     14.3     15.2  \n","1190       NaN       NaN      8.8     13.1  \n","2701       NaN       NaN     13.1     16.9  \n","1227       NaN       NaN     13.5     15.5  \n","1234       NaN       NaN     20.9     28.9  \n","1266       NaN       NaN     22.4     18.4  \n","1282       NaN       NaN     17.5     22.1  \n","1289       NaN       NaN     16.8     21.6  \n","86         NaN       NaN     17.2     22.5  \n","83         NaN       NaN     15.1     22.6  \n","1356       NaN       NaN     20.8     23.7  \n","1390       NaN       NaN     18.3     27.4  \n","107        NaN       NaN     19.1     23.8  \n","3403       NaN       NaN     33.4     38.6  \n","3023       NaN       NaN     17.7     21.0  \n","...        ...       ...      ...      ...  \n","179        1.0       NaN     30.4     29.7  \n","212        6.0       8.0     13.2     14.2  \n","1392       NaN       NaN     17.6     26.0  \n","1436       0.0       0.0     19.9     28.7  \n","1431       0.0       0.0     27.6     37.9  \n","204        3.0       2.0     26.8     36.2  \n","225        1.0       7.0     13.2     17.3  \n","400        NaN       NaN     24.0     37.7  \n","1174       2.0       0.0     11.7     20.3  \n","3413       0.0       0.0     16.9     28.3  \n","396        0.0       0.0     28.1     39.6  \n","2867       0.0       0.0     12.4     19.9  \n","1741       1.0       1.0     17.9     28.9  \n","2380       2.0       1.0     25.6     35.0  \n","975        1.0       0.0     23.1     34.7  \n","1716       5.0       7.0     27.2     29.7  \n","2569       0.0       0.0     15.8     24.5  \n","355        5.0       6.0      9.7     15.6  \n","592        1.0       4.0     17.4     26.2  \n","343        4.0       6.0     22.8     32.1  \n","1994       0.0       0.0     12.7     19.4  \n","2811       1.0       1.0     16.6     28.6  \n","1629       3.0       4.0      9.4     15.8  \n","320        0.0       0.0     25.7     28.1  \n","1072       7.0       3.0     28.3     36.4  \n","2223       0.0       1.0     13.7     20.1  \n","1984       7.0       7.0     18.0     21.5  \n","1592       4.0       6.0     18.3     24.9  \n","2824       6.0       7.0     20.5     26.2  \n","1005       1.0       1.0     11.5     18.5  \n","\n","[3500 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":32}]},{"metadata":{"id":"J9ppQbTwJGxB","colab_type":"code","colab":{}},"cell_type":"code","source":["#不同地点上一段相似的时间的数据"],"execution_count":0,"outputs":[]},{"metadata":{"id":"o9LOtPB9JGxJ","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":194},"outputId":"f6c7e677-9c52-4e1f-a2ec-0c163124f881","executionInfo":{"status":"ok","timestamp":1546764351159,"user_tz":-480,"elapsed":1785,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.loc[Xtrain.iloc[:,0] == \"2015-08-24\",:]"],"execution_count":33,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Date</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed9am</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>2015-08-24</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","    </tr>\n","    <tr>\n","      <th>2906</th>\n","      <td>2015-08-24</td>\n","      <td>Adelaide</td>\n","      <td>9.2</td>\n","      <td>14.3</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SE</td>\n","      <td>48.0</td>\n","      <td>SE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>19.0</td>\n","      <td>64.0</td>\n","      <td>42.0</td>\n","      <td>1024.7</td>\n","      <td>1024.1</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>9.9</td>\n","      <td>13.4</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>2 rows × 21 columns</p>\n","</div>"],"text/plain":["            Date   Location  MinTemp  MaxTemp  Rainfall  Evaporation  \\\n","0     2015-08-24  Katherine     17.5     36.0       0.0          8.8   \n","2906  2015-08-24   Adelaide      9.2     14.3       0.0          NaN   \n","\n","      Sunshine WindGustDir  WindGustSpeed WindDir9am   ...    WindSpeed9am  \\\n","0          NaN         ESE           26.0        NNW   ...            17.0   \n","2906       NaN          SE           48.0         SE   ...            17.0   \n","\n","      WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  \\\n","0             15.0         57.0          NaN       1016.8       1012.2   \n","2906          19.0         64.0         42.0       1024.7       1024.1   \n","\n","      Cloud9am  Cloud3pm  Temp9am  Temp3pm  \n","0          0.0       NaN     27.5      NaN  \n","2906       NaN       NaN      9.9     13.4  \n","\n","[2 rows x 21 columns]"]},"metadata":{"tags":[]},"execution_count":33}]},{"metadata":{"id":"Ncm7jenYJGxT","colab_type":"code","colab":{}},"cell_type":"code","source":["#首先，日期不是独一无二的，日期有重复\n","#其次，在我们分训练集和测试集之后，日期也不是连续的，而是分散的\n","#某一年的某一天倾向于会下雨？或者倾向于不会下雨吗？\n","#不是日期影响了下雨与否，反而更多的是这一天的日照时间，湿度，温度等等这些因素影响了是否会下雨\n","#光看日期，其实感觉它对我们的判断并无直接影响\n","#如果我们把它当作连续型变量处理，那算法会人为它是一系列1~3000左右的数字，不会意识到这是日期"],"execution_count":0,"outputs":[]},{"metadata":{"id":"T_UuIRCVJGxc","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"3c1fe33b-a91f-44ac-fe52-787bf3e44a44","executionInfo":{"status":"ok","timestamp":1546764085210,"user_tz":-480,"elapsed":814,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.iloc[:,0].value_counts().count()\n","#如果我们把它当作分类型变量处理，类别太多，有2141类，如果换成数值型，会被直接当成连续型变量，如果做成哑变量，我们特征的维度会爆炸"],"execution_count":30,"outputs":[{"output_type":"execute_result","data":{"text/plain":["2141"]},"metadata":{"tags":[]},"execution_count":30}]},{"metadata":{"id":"994qZOFkJGxg","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":374},"outputId":"d13a7096-9d5a-4c5f-d590-8996a6484497","executionInfo":{"status":"ok","timestamp":1546764992701,"user_tz":-480,"elapsed":1034,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain[\"Rainfall\"].head(20)"],"execution_count":34,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0      0.0\n","1      0.0\n","2      0.0\n","3      0.0\n","4      0.0\n","5      0.0\n","6      0.0\n","7      0.2\n","8      0.0\n","9      0.2\n","10     1.0\n","11     0.0\n","12     0.2\n","13     0.0\n","14     0.0\n","15     3.0\n","16     0.2\n","17     0.0\n","18    35.2\n","19     0.0\n","Name: Rainfall, dtype: float64"]},"metadata":{"tags":[]},"execution_count":34}]},{"metadata":{"id":"jmJ9wU4YJGxm","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"c2706d22-ad34-4c06-eb9e-48c1f6c0aa8b","executionInfo":{"status":"ok","timestamp":1546764997552,"user_tz":-480,"elapsed":776,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain[\"Rainfall\"].isnull().sum()\n","#假设你没有下雨\n","#复制你的空值"],"execution_count":35,"outputs":[{"output_type":"execute_result","data":{"text/plain":["33"]},"metadata":{"tags":[]},"execution_count":35}]},{"metadata":{"id":"M-xwdI4MJGxv","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":221},"outputId":"abb1ceae-6b70-425b-a36f-3e1ac828791b","executionInfo":{"status":"ok","timestamp":1546765002265,"user_tz":-480,"elapsed":741,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.loc[Xtrain[\"Rainfall\"] >= 1,\"RainToday\"] = \"Yes\"\n","Xtrain.loc[Xtrain[\"Rainfall\"] < 1,\"RainToday\"] = \"No\"\n","Xtrain.loc[Xtrain[\"Rainfall\"] == np.nan,\"RainToday\"] = np.nan"],"execution_count":36,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py:357: SettingWithCopyWarning: \n","A value is trying to be set on a copy of a slice from a DataFrame.\n","Try using .loc[row_indexer,col_indexer] = value instead\n","\n","See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n","  self.obj[key] = _infer_fill_value(value)\n","/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py:537: SettingWithCopyWarning: \n","A value is trying to be set on a copy of a slice from a DataFrame.\n","Try using .loc[row_indexer,col_indexer] = value instead\n","\n","See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n","  self.obj[item] = s\n"],"name":"stderr"}]},{"metadata":{"id":"L2sd8HFlJGx0","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":338},"outputId":"543d67f8-43aa-4cbe-d9ac-0d10f5b5b6cf","executionInfo":{"status":"ok","timestamp":1546765007015,"user_tz":-480,"elapsed":747,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.head()"],"execution_count":37,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Date</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>2015-08-24</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2016-12-10</td>\n","      <td>Tuggeranong</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2010-04-18</td>\n","      <td>Albany</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>2009-11-26</td>\n","      <td>Sale</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>2014-04-25</td>\n","      <td>Mildura</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["         Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0  2015-08-24    Katherine     17.5     36.0       0.0          8.8       NaN   \n","1  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n","2  2010-04-18       Albany     13.0     22.6       0.0          3.8      10.4   \n","3  2009-11-26         Sale     13.9     29.8       0.0          5.8       5.1   \n","4  2014-04-25      Mildura      6.0     23.5       0.0          2.8       8.6   \n","\n","  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n","0         ESE           26.0        NNW    ...             15.0         57.0   \n","1         NNW           33.0         NE    ...             17.0         59.0   \n","2         NaN            NaN         NE    ...             31.0         79.0   \n","3           S           37.0          N    ...             28.0         82.0   \n","4         NNE           24.0          E    ...             15.0         58.0   \n","\n","   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n","0          NaN       1016.8       1012.2       0.0       NaN     27.5   \n","1         31.0       1020.4       1017.5       NaN       NaN     14.6   \n","2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n","3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n","4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n","\n","   Temp3pm  RainToday  \n","0      NaN         No  \n","1     23.6         No  \n","2     20.8         No  \n","3     27.5         No  \n","4     22.4         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":37}]},{"metadata":{"id":"PVQqKUIyJGx4","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":68},"outputId":"5ed23b2c-96eb-4055-aaa0-08cb10df82fd","executionInfo":{"status":"ok","timestamp":1546765014217,"user_tz":-480,"elapsed":771,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.loc[:,\"RainToday\"].value_counts()"],"execution_count":38,"outputs":[{"output_type":"execute_result","data":{"text/plain":["No     2642\n","Yes     825\n","Name: RainToday, dtype: int64"]},"metadata":{"tags":[]},"execution_count":38}]},{"metadata":{"id":"3usOhbAGJGx8","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":221},"outputId":"24bf6856-797c-4e20-e0ec-9d145f103790","executionInfo":{"status":"ok","timestamp":1546765019665,"user_tz":-480,"elapsed":829,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtest.loc[Xtest[\"Rainfall\"] >= 1,\"RainToday\"] = \"Yes\"\n","Xtest.loc[Xtest[\"Rainfall\"] < 1,\"RainToday\"] = \"No\"\n","Xtest.loc[Xtest[\"Rainfall\"] == np.nan,\"RainToday\"] = np.nan"],"execution_count":39,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py:357: SettingWithCopyWarning: \n","A value is trying to be set on a copy of a slice from a DataFrame.\n","Try using .loc[row_indexer,col_indexer] = value instead\n","\n","See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n","  self.obj[key] = _infer_fill_value(value)\n","/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py:537: SettingWithCopyWarning: \n","A value is trying to be set on a copy of a slice from a DataFrame.\n","Try using .loc[row_indexer,col_indexer] = value instead\n","\n","See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n","  self.obj[item] = s\n"],"name":"stderr"}]},{"metadata":{"id":"E0Hbs0PJJGyA","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":338},"outputId":"8733ee40-9ac2-4625-c239-d68e08efbd54","executionInfo":{"status":"ok","timestamp":1546765023938,"user_tz":-480,"elapsed":1118,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.head()"],"execution_count":40,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Date</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>2015-08-24</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2016-12-10</td>\n","      <td>Tuggeranong</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2010-04-18</td>\n","      <td>Albany</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>2009-11-26</td>\n","      <td>Sale</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>2014-04-25</td>\n","      <td>Mildura</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["         Date     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0  2015-08-24    Katherine     17.5     36.0       0.0          8.8       NaN   \n","1  2016-12-10  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n","2  2010-04-18       Albany     13.0     22.6       0.0          3.8      10.4   \n","3  2009-11-26         Sale     13.9     29.8       0.0          5.8       5.1   \n","4  2014-04-25      Mildura      6.0     23.5       0.0          2.8       8.6   \n","\n","  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n","0         ESE           26.0        NNW    ...             15.0         57.0   \n","1         NNW           33.0         NE    ...             17.0         59.0   \n","2         NaN            NaN         NE    ...             31.0         79.0   \n","3           S           37.0          N    ...             28.0         82.0   \n","4         NNE           24.0          E    ...             15.0         58.0   \n","\n","   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n","0          NaN       1016.8       1012.2       0.0       NaN     27.5   \n","1         31.0       1020.4       1017.5       NaN       NaN     14.6   \n","2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n","3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n","4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n","\n","   Temp3pm  RainToday  \n","0      NaN         No  \n","1     23.6         No  \n","2     20.8         No  \n","3     27.5         No  \n","4     22.4         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":40}]},{"metadata":{"id":"MlB7oI7xJGyE","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":338},"outputId":"63bec44e-da83-4278-d3ea-bc1348cb1f5a","executionInfo":{"status":"ok","timestamp":1546765029009,"user_tz":-480,"elapsed":1180,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtest.head()"],"execution_count":41,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Date</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>2016-01-23</td>\n","      <td>NorahHead</td>\n","      <td>22.0</td>\n","      <td>27.8</td>\n","      <td>25.2</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SSW</td>\n","      <td>57.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>37.0</td>\n","      <td>91.0</td>\n","      <td>86.0</td>\n","      <td>1006.6</td>\n","      <td>1008.1</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>26.2</td>\n","      <td>23.1</td>\n","      <td>Yes</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>2009-03-05</td>\n","      <td>MountGambier</td>\n","      <td>12.0</td>\n","      <td>18.6</td>\n","      <td>2.2</td>\n","      <td>3.0</td>\n","      <td>7.8</td>\n","      <td>SW</td>\n","      <td>52.0</td>\n","      <td>SW</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>88.0</td>\n","      <td>62.0</td>\n","      <td>1020.2</td>\n","      <td>1019.9</td>\n","      <td>8.0</td>\n","      <td>7.0</td>\n","      <td>14.8</td>\n","      <td>17.5</td>\n","      <td>Yes</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>2010-03-05</td>\n","      <td>MountGinini</td>\n","      <td>9.1</td>\n","      <td>13.3</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>41.0</td>\n","      <td>NaN</td>\n","      <td>...</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>2013-10-26</td>\n","      <td>Wollongong</td>\n","      <td>13.1</td>\n","      <td>20.3</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SW</td>\n","      <td>33.0</td>\n","      <td>W</td>\n","      <td>...</td>\n","      <td>24.0</td>\n","      <td>40.0</td>\n","      <td>51.0</td>\n","      <td>1021.3</td>\n","      <td>1019.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>16.8</td>\n","      <td>19.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>2016-11-28</td>\n","      <td>Sale</td>\n","      <td>12.2</td>\n","      <td>20.0</td>\n","      <td>0.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>E</td>\n","      <td>33.0</td>\n","      <td>SW</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>92.0</td>\n","      <td>69.0</td>\n","      <td>1015.6</td>\n","      <td>1013.2</td>\n","      <td>8.0</td>\n","      <td>4.0</td>\n","      <td>13.6</td>\n","      <td>19.0</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["         Date      Location  MinTemp  MaxTemp  Rainfall  Evaporation  \\\n","0  2016-01-23     NorahHead     22.0     27.8      25.2          NaN   \n","1  2009-03-05  MountGambier     12.0     18.6       2.2          3.0   \n","2  2010-03-05   MountGinini      9.1     13.3       NaN          NaN   \n","3  2013-10-26    Wollongong     13.1     20.3       0.0          NaN   \n","4  2016-11-28          Sale     12.2     20.0       0.4          NaN   \n","\n","   Sunshine WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  \\\n","0       NaN         SSW           57.0          S    ...             37.0   \n","1       7.8          SW           52.0         SW    ...             28.0   \n","2       NaN          NE           41.0        NaN    ...              NaN   \n","3       NaN          SW           33.0          W    ...             24.0   \n","4       NaN           E           33.0         SW    ...             19.0   \n","\n","   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n","0         91.0         86.0       1006.6       1008.1       NaN       NaN   \n","1         88.0         62.0       1020.2       1019.9       8.0       7.0   \n","2          NaN          NaN          NaN          NaN       NaN       NaN   \n","3         40.0         51.0       1021.3       1019.5       NaN       NaN   \n","4         92.0         69.0       1015.6       1013.2       8.0       4.0   \n","\n","   Temp9am  Temp3pm  RainToday  \n","0     26.2     23.1        Yes  \n","1     14.8     17.5        Yes  \n","2      NaN      NaN        NaN  \n","3     16.8     19.6         No  \n","4     13.6     19.0         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":41}]},{"metadata":{"id":"FFILvS5YYbVq","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"5823397f-1c06-4fbe-be54-85d874ad2172","executionInfo":{"status":"ok","timestamp":1546765059715,"user_tz":-480,"elapsed":734,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.loc[0,\"Date\"].split(\"-\")"],"execution_count":43,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['2015', '08', '24']"]},"metadata":{"tags":[]},"execution_count":43}]},{"metadata":{"id":"OCM9ZJGcJGyG","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"55e1999f-c038-44a9-e7fe-0d7bbc5b26b9","executionInfo":{"status":"ok","timestamp":1546765041145,"user_tz":-480,"elapsed":851,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["int(Xtrain.loc[0,\"Date\"].split(\"-\")[1]) #提取出月份"],"execution_count":42,"outputs":[{"output_type":"execute_result","data":{"text/plain":["8"]},"metadata":{"tags":[]},"execution_count":42}]},{"metadata":{"id":"jMMAnyERJGyI","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":119},"outputId":"451736b3-e5ee-4835-a2df-e9a38b748d87","executionInfo":{"status":"ok","timestamp":1546765116327,"user_tz":-480,"elapsed":729,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain[\"Date\"] = Xtrain[\"Date\"].apply(lambda x:int(x.split(\"-\")[1]))\n","#apply是对dataframe上的某一列进行处理的一个函数\n","#lambda x匿名函数，请在dataframe上这一列中的每一行帮我执行冒号后的命令"],"execution_count":44,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n","A value is trying to be set on a copy of a slice from a DataFrame.\n","Try using .loc[row_indexer,col_indexer] = value instead\n","\n","See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n","  \"\"\"Entry point for launching an IPython kernel.\n"],"name":"stderr"}]},{"metadata":{"id":"7xj50PjqJGyK","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":238},"outputId":"6955eef4-2c6b-4d8a-d9ec-3f110772d6b9","executionInfo":{"status":"ok","timestamp":1546765122628,"user_tz":-480,"elapsed":1146,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.loc[:,\"Date\"].value_counts()"],"execution_count":45,"outputs":[{"output_type":"execute_result","data":{"text/plain":["3     334\n","5     324\n","7     316\n","9     302\n","6     302\n","1     300\n","11    299\n","10    282\n","4     265\n","2     264\n","12    259\n","8     253\n","Name: Date, dtype: int64"]},"metadata":{"tags":[]},"execution_count":45}]},{"metadata":{"id":"sycbrFAaJGyN","colab_type":"code","colab":{}},"cell_type":"code","source":["#替换完毕后，我们需要修改列的名称\n","#rename是比较少用的，可以用来修改单个列名的函数\n","#我们通常都直接使用 df.columns = 某个列表 这样的形式来一次修改所有的列名\n","#但rename允许我们只修改某个单独的列\n","Xtrain = Xtrain.rename(columns={\"Date\":\"Month\"})"],"execution_count":0,"outputs":[]},{"metadata":{"id":"vHWaOwwaJGyQ","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":253},"outputId":"dd10708b-370a-4377-97be-c1686c3b82e6","executionInfo":{"status":"ok","timestamp":1546765231104,"user_tz":-480,"elapsed":856,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.head()"],"execution_count":47,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>8</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>12</td>\n","      <td>Tuggeranong</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>4</td>\n","      <td>Albany</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>11</td>\n","      <td>Sale</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4</td>\n","      <td>Mildura</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0      8    Katherine     17.5     36.0       0.0          8.8       NaN   \n","1     12  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n","2      4       Albany     13.0     22.6       0.0          3.8      10.4   \n","3     11         Sale     13.9     29.8       0.0          5.8       5.1   \n","4      4      Mildura      6.0     23.5       0.0          2.8       8.6   \n","\n","  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n","0         ESE           26.0        NNW    ...             15.0         57.0   \n","1         NNW           33.0         NE    ...             17.0         59.0   \n","2         NaN            NaN         NE    ...             31.0         79.0   \n","3           S           37.0          N    ...             28.0         82.0   \n","4         NNE           24.0          E    ...             15.0         58.0   \n","\n","   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n","0          NaN       1016.8       1012.2       0.0       NaN     27.5   \n","1         31.0       1020.4       1017.5       NaN       NaN     14.6   \n","2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n","3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n","4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n","\n","   Temp3pm  RainToday  \n","0      NaN         No  \n","1     23.6         No  \n","2     20.8         No  \n","3     27.5         No  \n","4     22.4         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":47}]},{"metadata":{"id":"EeF2QYqXJGyU","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":119},"outputId":"12a77612-0f5c-4d58-945c-b727b0faf3d5","executionInfo":{"status":"ok","timestamp":1546765285082,"user_tz":-480,"elapsed":708,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtest[\"Date\"] = Xtest[\"Date\"].apply(lambda x:int(x.split(\"-\")[1]))\n","Xtest = Xtest.rename(columns={\"Date\":\"Month\"})"],"execution_count":48,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n","A value is trying to be set on a copy of a slice from a DataFrame.\n","Try using .loc[row_indexer,col_indexer] = value instead\n","\n","See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n","  \"\"\"Entry point for launching an IPython kernel.\n"],"name":"stderr"}]},{"metadata":{"id":"6bFsyWmIJGyW","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":253},"outputId":"1c5da515-1e06-4b2a-b128-c47281cd9eab","executionInfo":{"status":"ok","timestamp":1546765287749,"user_tz":-480,"elapsed":789,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtest.head()"],"execution_count":49,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1</td>\n","      <td>NorahHead</td>\n","      <td>22.0</td>\n","      <td>27.8</td>\n","      <td>25.2</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SSW</td>\n","      <td>57.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>37.0</td>\n","      <td>91.0</td>\n","      <td>86.0</td>\n","      <td>1006.6</td>\n","      <td>1008.1</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>26.2</td>\n","      <td>23.1</td>\n","      <td>Yes</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>3</td>\n","      <td>MountGambier</td>\n","      <td>12.0</td>\n","      <td>18.6</td>\n","      <td>2.2</td>\n","      <td>3.0</td>\n","      <td>7.8</td>\n","      <td>SW</td>\n","      <td>52.0</td>\n","      <td>SW</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>88.0</td>\n","      <td>62.0</td>\n","      <td>1020.2</td>\n","      <td>1019.9</td>\n","      <td>8.0</td>\n","      <td>7.0</td>\n","      <td>14.8</td>\n","      <td>17.5</td>\n","      <td>Yes</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>3</td>\n","      <td>MountGinini</td>\n","      <td>9.1</td>\n","      <td>13.3</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>41.0</td>\n","      <td>NaN</td>\n","      <td>...</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>10</td>\n","      <td>Wollongong</td>\n","      <td>13.1</td>\n","      <td>20.3</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SW</td>\n","      <td>33.0</td>\n","      <td>W</td>\n","      <td>...</td>\n","      <td>24.0</td>\n","      <td>40.0</td>\n","      <td>51.0</td>\n","      <td>1021.3</td>\n","      <td>1019.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>16.8</td>\n","      <td>19.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>11</td>\n","      <td>Sale</td>\n","      <td>12.2</td>\n","      <td>20.0</td>\n","      <td>0.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>E</td>\n","      <td>33.0</td>\n","      <td>SW</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>92.0</td>\n","      <td>69.0</td>\n","      <td>1015.6</td>\n","      <td>1013.2</td>\n","      <td>8.0</td>\n","      <td>4.0</td>\n","      <td>13.6</td>\n","      <td>19.0</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month      Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0      1     NorahHead     22.0     27.8      25.2          NaN       NaN   \n","1      3  MountGambier     12.0     18.6       2.2          3.0       7.8   \n","2      3   MountGinini      9.1     13.3       NaN          NaN       NaN   \n","3     10    Wollongong     13.1     20.3       0.0          NaN       NaN   \n","4     11          Sale     12.2     20.0       0.4          NaN       NaN   \n","\n","  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n","0         SSW           57.0          S    ...             37.0         91.0   \n","1          SW           52.0         SW    ...             28.0         88.0   \n","2          NE           41.0        NaN    ...              NaN          NaN   \n","3          SW           33.0          W    ...             24.0         40.0   \n","4           E           33.0         SW    ...             19.0         92.0   \n","\n","   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n","0         86.0       1006.6       1008.1       NaN       NaN     26.2   \n","1         62.0       1020.2       1019.9       8.0       7.0     14.8   \n","2          NaN          NaN          NaN       NaN       NaN      NaN   \n","3         51.0       1021.3       1019.5       NaN       NaN     16.8   \n","4         69.0       1015.6       1013.2       8.0       4.0     13.6   \n","\n","   Temp3pm  RainToday  \n","0     23.1        Yes  \n","1     17.5        Yes  \n","2      NaN        NaN  \n","3     19.6         No  \n","4     19.0         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":49}]},{"metadata":{"id":"nOW2T7JJZWFk","colab_type":"text"},"cell_type":"markdown","source":["## 3处理困难特征：地点"]},{"metadata":{"id":"wsa22vzYJGyY","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"44f6f9d7-b1cd-4e82-b8f1-7ca884d50139","executionInfo":{"status":"ok","timestamp":1546765717496,"user_tz":-480,"elapsed":683,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.loc[:,\"Location\"].value_counts().count()\n","#超过25个类别的分类型变量，都会被算法当成是连续型变量"],"execution_count":50,"outputs":[{"output_type":"execute_result","data":{"text/plain":["49"]},"metadata":{"tags":[]},"execution_count":50}]},{"metadata":{"id":"Yk4yALj5dPkz","colab_type":"code","colab":{}},"cell_type":"code","source":["citynames = Xtrain.loc[:,\"Location\"].unique()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"JVtOEGDXcpsB","colab_type":"text"},"cell_type":"markdown","source":["### 爬虫"]},{"metadata":{"id":"TAWUqG8Zcyhc","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":139},"outputId":"98e2c22f-e55c-41a9-96fe-27351f13b93f","executionInfo":{"status":"ok","timestamp":1546766229696,"user_tz":-480,"elapsed":6102,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["!pip install selenium"],"execution_count":52,"outputs":[{"output_type":"stream","text":["Collecting selenium\n","\u001b[?25l  Downloading https://files.pythonhosted.org/packages/80/d6/4294f0b4bce4de0abf13e17190289f9d0613b0a44e5dd6a7f5ca98459853/selenium-3.141.0-py2.py3-none-any.whl (904kB)\n","\u001b[K    100% |████████████████████████████████| 911kB 18.4MB/s \n","\u001b[?25hRequirement already satisfied: urllib3 in /usr/local/lib/python3.6/dist-packages (from selenium) (1.22)\n","Installing collected packages: selenium\n","Successfully installed selenium-3.141.0\n"],"name":"stdout"}]},{"metadata":{"id":"QNo52E3hcs-A","colab_type":"code","colab":{}},"cell_type":"code","source":["import time\n","from selenium import webdriver #导入需要的模块，其中爬虫使用的是selenium\n","import pandas as pd\n","import numpy as np"],"execution_count":0,"outputs":[]},{"metadata":{"id":"ZJXjPa43ctKn","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":831},"outputId":"233e1705-6576-4917-e3e8-37a6b2a37de2","executionInfo":{"status":"error","timestamp":1546766387597,"user_tz":-480,"elapsed":1096,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["df = pd.DataFrame(index=range(len(citynames))) #创建新dataframe用于存储爬取的数据\n","driver = webdriver.Chrome() #调用谷歌浏览器\n","time0 = time.time() #计时开始\n","#循环开始\n","for num, city in enumerate(citynames): #在城市名称中进行遍历\n","\tdriver.get('https://www.google.co.uk/webhp?hl=en&sa=X&ved=0ahUKEwimtcX24cTfAhUJE7wKHVkWB5AQPAgH')\n","\t#首先打开谷歌主页\n","\ttime.sleep(0.3)\n","\t#停留0.3秒让我们知道发生了什么\n","\tsearch_box = driver.find_element_by_name('q') #锁定谷歌的搜索输入框\n","\tsearch_box.send_keys('%s Australia Latitude and longitude' % (city)) #在输入框中输入“城市” 澳大利亚 经纬度\n","\tsearch_box.submit() #enter，确认开始搜索\n","\tresult = driver.find_element_by_xpath('//div[@class=\"Z0LcW\"]').text #？爬取需要的经纬度，就是这里，怎么获取的呢？\n","\tresultsplit = result.split(\" \") #将爬取的结果用split进行分割\n","\tdf.loc[num,\"City\"] = city #向提前创建好的df中输入爬取的数据，第一列是城市名\n","\tdf.loc[num,\"Latitude\"] = resultsplit[0] #第二列是纬度\n","\tdf.loc[num,\"Longitude\"] = resultsplit[2] #第三列是经度\n","\tdf.loc[num,\"Latitudedir\"] = resultsplit[1] #第四列是纬度的方向\n","\tdf.loc[num,\"Longitudedir\"] = resultsplit[3] #第五列是经度的方向\n","\tprint(\"%i webcrawler successful for city %s\" % (num,city)) #每次爬虫成功之后，就打印“城市”成功了\n","time.sleep(1) #全部爬取完毕后，停留1秒钟\n","driver.quit() #关闭浏览器\n","print(time.time() - time0) #打印所需的时间"],"execution_count":58,"outputs":[{"output_type":"error","ename":"WebDriverException","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)","\u001b[0;32m/usr/local/lib/python3.6/dist-packages/selenium/webdriver/common/service.py\u001b[0m in \u001b[0;36mstart\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     75\u001b[0m                                             \u001b[0mstderr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlog_file\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 76\u001b[0;31m                                             stdin=PIPE)\n\u001b[0m\u001b[1;32m     77\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/lib/python3.6/subprocess.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors)\u001b[0m\n\u001b[1;32m    708\u001b[0m                                 \u001b[0merrread\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrwrite\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 709\u001b[0;31m                                 restore_signals, start_new_session)\n\u001b[0m\u001b[1;32m    710\u001b[0m         \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/lib/python3.6/subprocess.py\u001b[0m in \u001b[0;36m_execute_child\u001b[0;34m(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)\u001b[0m\n\u001b[1;32m   1343\u001b[0m                             \u001b[0merr_msg\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m': '\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mrepr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr_filename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1344\u001b[0;31m                     \u001b[0;32mraise\u001b[0m \u001b[0mchild_exception_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merrno_num\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merr_msg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merr_filename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1345\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mchild_exception_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0merr_msg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'chromedriver': 'chromedriver'","\nDuring handling of the above exception, another exception occurred:\n","\u001b[0;31mWebDriverException\u001b[0m                        Traceback (most recent call last)","\u001b[0;32m<ipython-input-58-78c1f3430dcc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcitynames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#创建新dataframe用于存储爬取的数据\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdriver\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mwebdriver\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mChrome\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#调用谷歌浏览器\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mtime0\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#计时开始\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m#循环开始\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mnum\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcity\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcitynames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m#在城市名称中进行遍历\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.6/dist-packages/selenium/webdriver/chrome/webdriver.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, executable_path, port, options, service_args, desired_capabilities, service_log_path, chrome_options, keep_alive)\u001b[0m\n\u001b[1;32m     71\u001b[0m             \u001b[0mservice_args\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mservice_args\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     72\u001b[0m             log_path=service_log_path)\n\u001b[0;32m---> 73\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mservice\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     75\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.6/dist-packages/selenium/webdriver/common/service.py\u001b[0m in \u001b[0;36mstart\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     81\u001b[0m                 raise WebDriverException(\n\u001b[1;32m     82\u001b[0m                     \"'%s' executable needs to be in PATH. %s\" % (\n\u001b[0;32m---> 83\u001b[0;31m                         os.path.basename(self.path), self.start_error_message)\n\u001b[0m\u001b[1;32m     84\u001b[0m                 )\n\u001b[1;32m     85\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0merr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrno\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0merrno\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEACCES\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mWebDriverException\u001b[0m: Message: 'chromedriver' executable needs to be in PATH. Please see https://sites.google.com/a/chromium.org/chromedriver/home\n"]}]},{"metadata":{"id":"p9Vl7pQNctVg","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]},{"metadata":{"id":"DFz1Xb8Cctfl","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]},{"metadata":{"id":"F9xPJTyQJGyZ","colab_type":"code","colab":{}},"cell_type":"code","source":["cityll = pd.read_csv(r\"cityll.csv\",index_col=0)\n","city_climate = pd.read_csv(r\"Cityclimate.csv\")"],"execution_count":0,"outputs":[]},{"metadata":{"id":"dhz6_FhiJGya","colab_type":"code","colab":{},"outputId":"f50f147e-542b-4c7a-a078-38c528fcfdc5"},"cell_type":"code","source":["cityll.head() #每个城市对应的经纬度，这些城市是澳大利亚统计局做的那张地图上的城市"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>City</th>\n","      <th>Latitude</th>\n","      <th>Longitude</th>\n","      <th>Latitudedir</th>\n","      <th>Longitudedir</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Adelaide</td>\n","      <td>34.9285°</td>\n","      <td>138.6007°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Albany</td>\n","      <td>35.0275°</td>\n","      <td>117.8840°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Albury</td>\n","      <td>36.0737°</td>\n","      <td>146.9135°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Wodonga</td>\n","      <td>36.1241°</td>\n","      <td>146.8818°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>AliceSprings</td>\n","      <td>23.6980°</td>\n","      <td>133.8807°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["           City  Latitude  Longitude Latitudedir Longitudedir\n","0      Adelaide  34.9285°  138.6007°          S,            E\n","1        Albany  35.0275°  117.8840°          S,            E\n","2        Albury  36.0737°  146.9135°          S,            E\n","3       Wodonga  36.1241°  146.8818°          S,            E\n","4  AliceSprings  23.6980°  133.8807°          S,            E"]},"metadata":{"tags":[]},"execution_count":59}]},{"metadata":{"id":"2o1m9kBzJGyd","colab_type":"code","colab":{},"outputId":"aa4ce1c3-7217-424c-e134-de1cc1649cbf"},"cell_type":"code","source":["float(cityll.loc[0,\"Latitude\"][:-1])"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["34.9285"]},"metadata":{"tags":[]},"execution_count":60}]},{"metadata":{"id":"sPfNbfgtJGyi","colab_type":"code","colab":{},"outputId":"aff6effc-b16e-413a-e1e3-408847e32ad9"},"cell_type":"code","source":["cityll.loc[:,\"Latitudedir\"].value_counts()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["S,    100\n","Name: Latitudedir, dtype: int64"]},"metadata":{"tags":[]},"execution_count":61}]},{"metadata":{"id":"mrdPifr0JGyk","colab_type":"code","colab":{},"outputId":"f415ae35-dcee-4878-be99-46c9948d4bb9"},"cell_type":"code","source":["city_climate.head() #澳大利亚统计局做的每个城市对应的气候"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>City</th>\n","      <th>Climate</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Adelaide</td>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Albany</td>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Albury</td>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Wodonga</td>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>AliceSprings</td>\n","      <td>Hot dry summer, warm winter</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["           City                      Climate\n","0      Adelaide            Warm temperate   \n","1        Albany            Mild temperate   \n","2        Albury  Hot dry summer, cool winter\n","3       Wodonga  Hot dry summer, cool winter\n","4  AliceSprings  Hot dry summer, warm winter"]},"metadata":{"tags":[]},"execution_count":62}]},{"metadata":{"id":"Lu3MUvqMJGyl","colab_type":"code","colab":{}},"cell_type":"code","source":["#去掉度数符号\n","cityll[\"Latitudenum\"] = cityll[\"Latitude\"].apply(lambda x:float(x[:-1]))\n","cityll[\"Longitudenum\"] = cityll[\"Longitude\"].apply(lambda x:float(x[:-1]))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Dzx8sCCpJGyn","colab_type":"code","colab":{}},"cell_type":"code","source":["#观察一下所有的经纬度方向都是一致的，全部是南纬，东经，因为澳大利亚在南半球，东半球\n","#所以经纬度的方向我们可以舍弃了\n","citylld = cityll.iloc[:,[0,5,6]]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"NTOUSPrZJGyp","colab_type":"code","colab":{},"outputId":"267e2a00-688e-4cef-fa9b-824451e1d0f4"},"cell_type":"code","source":["citylld"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>City</th>\n","      <th>Latitudenum</th>\n","      <th>Longitudenum</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Adelaide</td>\n","      <td>34.9285</td>\n","      <td>138.6007</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Albany</td>\n","      <td>35.0275</td>\n","      <td>117.8840</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Albury</td>\n","      <td>36.0737</td>\n","      <td>146.9135</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Wodonga</td>\n","      <td>36.1241</td>\n","      <td>146.8818</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>AliceSprings</td>\n","      <td>23.6980</td>\n","      <td>133.8807</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>Amata</td>\n","      <td>26.1509</td>\n","      <td>131.1467</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>Ballarat</td>\n","      <td>37.5622</td>\n","      <td>143.8503</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>Bathurst</td>\n","      <td>33.4193</td>\n","      <td>149.5775</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>Birdsville</td>\n","      <td>25.8989</td>\n","      <td>139.3517</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>Borroloola</td>\n","      <td>16.0703</td>\n","      <td>136.3072</td>\n","    </tr>\n","    <tr>\n","      <th>10</th>\n","      <td>Bourke</td>\n","      <td>30.0907</td>\n","      <td>145.9382</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>Brisbane</td>\n","      <td>27.4698</td>\n","      <td>153.0251</td>\n","    </tr>\n","    <tr>\n","      <th>12</th>\n","      <td>BrokenHill</td>\n","      <td>31.9539</td>\n","      <td>141.4539</td>\n","    </tr>\n","    <tr>\n","      <th>13</th>\n","      <td>Broome</td>\n","      <td>17.9614</td>\n","      <td>122.2359</td>\n","    </tr>\n","    <tr>\n","      <th>14</th>\n","      <td>Bunbury</td>\n","      <td>33.3256</td>\n","      <td>115.6396</td>\n","    </tr>\n","    <tr>\n","      <th>15</th>\n","      <td>Burketown</td>\n","      <td>17.8522</td>\n","      <td>139.6332</td>\n","    </tr>\n","    <tr>\n","      <th>16</th>\n","      <td>Burra</td>\n","      <td>33.6800</td>\n","      <td>138.9363</td>\n","    </tr>\n","    <tr>\n","      <th>17</th>\n","      <td>Cairns</td>\n","      <td>16.9186</td>\n","      <td>145.7781</td>\n","    </tr>\n","    <tr>\n","      <th>18</th>\n","      <td>Canberra</td>\n","      <td>35.2809</td>\n","      <td>149.1300</td>\n","    </tr>\n","    <tr>\n","      <th>19</th>\n","      <td>Carnarvon</td>\n","      <td>24.8840</td>\n","      <td>113.6610</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>Ceduna</td>\n","      <td>32.1306</td>\n","      <td>133.6817</td>\n","    </tr>\n","    <tr>\n","      <th>21</th>\n","      <td>Charleville</td>\n","      <td>26.4021</td>\n","      <td>146.2454</td>\n","    </tr>\n","    <tr>\n","      <th>22</th>\n","      <td>CooberPedy</td>\n","      <td>29.0139</td>\n","      <td>134.7533</td>\n","    </tr>\n","    <tr>\n","      <th>23</th>\n","      <td>Cooktown</td>\n","      <td>15.4758</td>\n","      <td>145.2471</td>\n","    </tr>\n","    <tr>\n","      <th>24</th>\n","      <td>CoffsHarbour</td>\n","      <td>30.2986</td>\n","      <td>153.1094</td>\n","    </tr>\n","    <tr>\n","      <th>25</th>\n","      <td>Dampier</td>\n","      <td>20.6582</td>\n","      <td>116.7151</td>\n","    </tr>\n","    <tr>\n","      <th>26</th>\n","      <td>Darwin</td>\n","      <td>12.4634</td>\n","      <td>130.8456</td>\n","    </tr>\n","    <tr>\n","      <th>27</th>\n","      <td>Derby</td>\n","      <td>17.3179</td>\n","      <td>123.6490</td>\n","    </tr>\n","    <tr>\n","      <th>28</th>\n","      <td>Devonport</td>\n","      <td>41.1771</td>\n","      <td>146.3452</td>\n","    </tr>\n","    <tr>\n","      <th>29</th>\n","      <td>Dubbo</td>\n","      <td>32.2315</td>\n","      <td>148.6330</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>70</th>\n","      <td>Perth</td>\n","      <td>31.9505</td>\n","      <td>115.8605</td>\n","    </tr>\n","    <tr>\n","      <th>71</th>\n","      <td>PerthAirport</td>\n","      <td>31.9440</td>\n","      <td>115.9680</td>\n","    </tr>\n","    <tr>\n","      <th>72</th>\n","      <td>PortHedland</td>\n","      <td>20.3107</td>\n","      <td>118.5878</td>\n","    </tr>\n","    <tr>\n","      <th>73</th>\n","      <td>PortLincoln</td>\n","      <td>34.7240</td>\n","      <td>135.8611</td>\n","    </tr>\n","    <tr>\n","      <th>74</th>\n","      <td>PortMacquarie</td>\n","      <td>31.4333</td>\n","      <td>152.9000</td>\n","    </tr>\n","    <tr>\n","      <th>75</th>\n","      <td>Renmark</td>\n","      <td>34.1743</td>\n","      <td>140.7443</td>\n","    </tr>\n","    <tr>\n","      <th>76</th>\n","      <td>Rockhampton</td>\n","      <td>23.3791</td>\n","      <td>150.5100</td>\n","    </tr>\n","    <tr>\n","      <th>77</th>\n","      <td>Shepparton</td>\n","      <td>36.3833</td>\n","      <td>145.4000</td>\n","    </tr>\n","    <tr>\n","      <th>78</th>\n","      <td>Southport</td>\n","      <td>27.9738</td>\n","      <td>153.4183</td>\n","    </tr>\n","    <tr>\n","      <th>79</th>\n","      <td>Strahan</td>\n","      <td>42.1500</td>\n","      <td>145.3167</td>\n","    </tr>\n","    <tr>\n","      <th>80</th>\n","      <td>Swansea</td>\n","      <td>33.0850</td>\n","      <td>151.6350</td>\n","    </tr>\n","    <tr>\n","      <th>81</th>\n","      <td>Sydney</td>\n","      <td>33.8688</td>\n","      <td>151.2093</td>\n","    </tr>\n","    <tr>\n","      <th>82</th>\n","      <td>SydneyAirport</td>\n","      <td>33.9399</td>\n","      <td>151.1753</td>\n","    </tr>\n","    <tr>\n","      <th>83</th>\n","      <td>Tamworth</td>\n","      <td>31.0927</td>\n","      <td>150.9320</td>\n","    </tr>\n","    <tr>\n","      <th>84</th>\n","      <td>Taroom</td>\n","      <td>25.6406</td>\n","      <td>149.7983</td>\n","    </tr>\n","    <tr>\n","      <th>85</th>\n","      <td>Telfer</td>\n","      <td>21.6924</td>\n","      <td>122.1478</td>\n","    </tr>\n","    <tr>\n","      <th>86</th>\n","      <td>TennantCreek</td>\n","      <td>19.6484</td>\n","      <td>134.1900</td>\n","    </tr>\n","    <tr>\n","      <th>87</th>\n","      <td>Thargomindah</td>\n","      <td>27.9944</td>\n","      <td>143.8229</td>\n","    </tr>\n","    <tr>\n","      <th>88</th>\n","      <td>Tibooburra</td>\n","      <td>29.4331</td>\n","      <td>142.0108</td>\n","    </tr>\n","    <tr>\n","      <th>89</th>\n","      <td>TimberCreek</td>\n","      <td>15.6432</td>\n","      <td>130.4666</td>\n","    </tr>\n","    <tr>\n","      <th>90</th>\n","      <td>Townsville</td>\n","      <td>19.2590</td>\n","      <td>146.8169</td>\n","    </tr>\n","    <tr>\n","      <th>91</th>\n","      <td>Warburton</td>\n","      <td>26.1353</td>\n","      <td>126.5783</td>\n","    </tr>\n","    <tr>\n","      <th>92</th>\n","      <td>Weipa</td>\n","      <td>12.6493</td>\n","      <td>141.8470</td>\n","    </tr>\n","    <tr>\n","      <th>93</th>\n","      <td>Whyalla</td>\n","      <td>33.0380</td>\n","      <td>137.5753</td>\n","    </tr>\n","    <tr>\n","      <th>94</th>\n","      <td>Wiluna</td>\n","      <td>26.5950</td>\n","      <td>120.2250</td>\n","    </tr>\n","    <tr>\n","      <th>95</th>\n","      <td>Wollongong</td>\n","      <td>34.4278</td>\n","      <td>150.8931</td>\n","    </tr>\n","    <tr>\n","      <th>96</th>\n","      <td>Wyndham</td>\n","      <td>15.4825</td>\n","      <td>128.1228</td>\n","    </tr>\n","    <tr>\n","      <th>97</th>\n","      <td>Yalgoo</td>\n","      <td>28.3445</td>\n","      <td>116.6851</td>\n","    </tr>\n","    <tr>\n","      <th>98</th>\n","      <td>Yulara</td>\n","      <td>25.2335</td>\n","      <td>130.9849</td>\n","    </tr>\n","    <tr>\n","      <th>99</th>\n","      <td>Uluru</td>\n","      <td>25.3444</td>\n","      <td>131.0369</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>100 rows × 3 columns</p>\n","</div>"],"text/plain":["             City  Latitudenum  Longitudenum\n","0        Adelaide      34.9285      138.6007\n","1          Albany      35.0275      117.8840\n","2          Albury      36.0737      146.9135\n","3         Wodonga      36.1241      146.8818\n","4    AliceSprings      23.6980      133.8807\n","5           Amata      26.1509      131.1467\n","6        Ballarat      37.5622      143.8503\n","7        Bathurst      33.4193      149.5775\n","8      Birdsville      25.8989      139.3517\n","9      Borroloola      16.0703      136.3072\n","10         Bourke      30.0907      145.9382\n","11       Brisbane      27.4698      153.0251\n","12     BrokenHill      31.9539      141.4539\n","13         Broome      17.9614      122.2359\n","14        Bunbury      33.3256      115.6396\n","15      Burketown      17.8522      139.6332\n","16          Burra      33.6800      138.9363\n","17         Cairns      16.9186      145.7781\n","18       Canberra      35.2809      149.1300\n","19      Carnarvon      24.8840      113.6610\n","20         Ceduna      32.1306      133.6817\n","21    Charleville      26.4021      146.2454\n","22     CooberPedy      29.0139      134.7533\n","23       Cooktown      15.4758      145.2471\n","24   CoffsHarbour      30.2986      153.1094\n","25        Dampier      20.6582      116.7151\n","26         Darwin      12.4634      130.8456\n","27          Derby      17.3179      123.6490\n","28      Devonport      41.1771      146.3452\n","29          Dubbo      32.2315      148.6330\n","..            ...          ...           ...\n","70          Perth      31.9505      115.8605\n","71   PerthAirport      31.9440      115.9680\n","72    PortHedland      20.3107      118.5878\n","73    PortLincoln      34.7240      135.8611\n","74  PortMacquarie      31.4333      152.9000\n","75        Renmark      34.1743      140.7443\n","76    Rockhampton      23.3791      150.5100\n","77     Shepparton      36.3833      145.4000\n","78      Southport      27.9738      153.4183\n","79        Strahan      42.1500      145.3167\n","80        Swansea      33.0850      151.6350\n","81         Sydney      33.8688      151.2093\n","82  SydneyAirport      33.9399      151.1753\n","83       Tamworth      31.0927      150.9320\n","84         Taroom      25.6406      149.7983\n","85         Telfer      21.6924      122.1478\n","86   TennantCreek      19.6484      134.1900\n","87   Thargomindah      27.9944      143.8229\n","88     Tibooburra      29.4331      142.0108\n","89    TimberCreek      15.6432      130.4666\n","90     Townsville      19.2590      146.8169\n","91      Warburton      26.1353      126.5783\n","92          Weipa      12.6493      141.8470\n","93        Whyalla      33.0380      137.5753\n","94         Wiluna      26.5950      120.2250\n","95     Wollongong      34.4278      150.8931\n","96        Wyndham      15.4825      128.1228\n","97         Yalgoo      28.3445      116.6851\n","98         Yulara      25.2335      130.9849\n","99          Uluru      25.3444      131.0369\n","\n","[100 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":65}]},{"metadata":{"id":"muNCaFDjJGyt","colab_type":"code","colab":{},"outputId":"154c2e5d-d4a1-4b79-81bf-c47fc6173618"},"cell_type":"code","source":["#将city_climate中的气候添加到我们的citylld中\n","citylld[\"climate\"] = city_climate.iloc[:,-1]"],"execution_count":0,"outputs":[{"output_type":"stream","text":["C:\\Python\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n","A value is trying to be set on a copy of a slice from a DataFrame.\n","Try using .loc[row_indexer,col_indexer] = value instead\n","\n","See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n","  \n"],"name":"stderr"}]},{"metadata":{"id":"l9A9vCyCJGyv","colab_type":"code","colab":{},"outputId":"79290b26-e07c-4434-cb7f-fea39cd0bcbd"},"cell_type":"code","source":["citylld.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>City</th>\n","      <th>Latitudenum</th>\n","      <th>Longitudenum</th>\n","      <th>climate</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Adelaide</td>\n","      <td>34.9285</td>\n","      <td>138.6007</td>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Albany</td>\n","      <td>35.0275</td>\n","      <td>117.8840</td>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Albury</td>\n","      <td>36.0737</td>\n","      <td>146.9135</td>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Wodonga</td>\n","      <td>36.1241</td>\n","      <td>146.8818</td>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>AliceSprings</td>\n","      <td>23.6980</td>\n","      <td>133.8807</td>\n","      <td>Hot dry summer, warm winter</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["           City  Latitudenum  Longitudenum                      climate\n","0      Adelaide      34.9285      138.6007            Warm temperate   \n","1        Albany      35.0275      117.8840            Mild temperate   \n","2        Albury      36.0737      146.9135  Hot dry summer, cool winter\n","3       Wodonga      36.1241      146.8818  Hot dry summer, cool winter\n","4  AliceSprings      23.6980      133.8807  Hot dry summer, warm winter"]},"metadata":{"tags":[]},"execution_count":67}]},{"metadata":{"id":"Q7WA5feWJGyz","colab_type":"code","colab":{},"outputId":"19eebdf5-2256-47c2-db79-01fe5291e9e5"},"cell_type":"code","source":["citylld.loc[:,\"climate\"].value_counts()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Hot dry summer, cool winter          24\n","Warm temperate                       18\n","Hot dry summer, warm winter          18\n","High humidity summer, warm winter    17\n","Cool temperate                        9\n","Mild temperate                        9\n","Warm humid summer, mild winter        5\n","Name: climate, dtype: int64"]},"metadata":{"tags":[]},"execution_count":68}]},{"metadata":{"id":"8GfSgRUOJGy3","colab_type":"code","colab":{}},"cell_type":"code","source":["samplecity = pd.read_csv(r\"C:\\work\\learnbetter\\micro-class\\week 8 SVM (2)\\samplecity.csv\",index_col=0)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Vuig8rI_JGy4","colab_type":"code","colab":{},"outputId":"2d4278e9-038c-4428-bd63-0e25706d3290"},"cell_type":"code","source":["samplecity.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>City</th>\n","      <th>Latitude</th>\n","      <th>Longitude</th>\n","      <th>Latitudedir</th>\n","      <th>Longitudedir</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Canberra</td>\n","      <td>35.2809°</td>\n","      <td>149.1300°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sydney</td>\n","      <td>33.8688°</td>\n","      <td>151.2093°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Perth</td>\n","      <td>31.9505°</td>\n","      <td>115.8605°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Darwin</td>\n","      <td>12.4634°</td>\n","      <td>130.8456°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Hobart</td>\n","      <td>42.8821°</td>\n","      <td>147.3272°</td>\n","      <td>S,</td>\n","      <td>E</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["       City  Latitude  Longitude Latitudedir Longitudedir\n","0  Canberra  35.2809°  149.1300°          S,            E\n","1    Sydney  33.8688°  151.2093°          S,            E\n","2     Perth  31.9505°  115.8605°          S,            E\n","3    Darwin  12.4634°  130.8456°          S,            E\n","4    Hobart  42.8821°  147.3272°          S,            E"]},"metadata":{"tags":[]},"execution_count":70}]},{"metadata":{"id":"rI1QuZKTJGy7","colab_type":"code","colab":{}},"cell_type":"code","source":["#我们对samplecity也执行同样的处理：去掉经纬度中度数的符号，并且舍弃我们的经纬度的方向\n","samplecity[\"Latitudenum\"] = samplecity[\"Latitude\"].apply(lambda x:float(x[:-1]))\n","samplecity[\"Longitudenum\"] = samplecity[\"Longitude\"].apply(lambda x:float(x[:-1]))\n","samplecityd = samplecity.iloc[:,[0,5,6]]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"hbAuwOFwJGy9","colab_type":"code","colab":{},"outputId":"a43bb667-bed1-41b6-8b37-3e8fa78d0cee"},"cell_type":"code","source":["samplecityd.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>City</th>\n","      <th>Latitudenum</th>\n","      <th>Longitudenum</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Canberra</td>\n","      <td>35.2809</td>\n","      <td>149.1300</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sydney</td>\n","      <td>33.8688</td>\n","      <td>151.2093</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Perth</td>\n","      <td>31.9505</td>\n","      <td>115.8605</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Darwin</td>\n","      <td>12.4634</td>\n","      <td>130.8456</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Hobart</td>\n","      <td>42.8821</td>\n","      <td>147.3272</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["       City  Latitudenum  Longitudenum\n","0  Canberra      35.2809      149.1300\n","1    Sydney      33.8688      151.2093\n","2     Perth      31.9505      115.8605\n","3    Darwin      12.4634      130.8456\n","4    Hobart      42.8821      147.3272"]},"metadata":{"tags":[]},"execution_count":72}]},{"metadata":{"id":"tHprFB-LJGzB","colab_type":"code","colab":{}},"cell_type":"code","source":["#首先使用radians将角度转换成弧度\n","from math import radians, sin, cos, acos\n","citylld.loc[:,\"slat\"] = citylld.iloc[:,1].apply(lambda x : radians(x))\n","citylld.loc[:,\"slon\"] = citylld.iloc[:,2].apply(lambda x : radians(x))\n","samplecityd.loc[:,\"elat\"] = samplecityd.iloc[:,1].apply(lambda x : radians(x))\n","samplecityd.loc[:,\"elon\"] = samplecityd.iloc[:,2].apply(lambda x : radians(x))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"_8j668TaJGzE","colab_type":"code","colab":{},"outputId":"766b86ca-4667-44d5-dd3c-a1bce71f1fb4"},"cell_type":"code","source":["import sys\n","for i in range(samplecityd.shape[0]):\n","    slat = citylld.loc[:,\"slat\"]\n","    slon = citylld.loc[:,\"slon\"]\n","    elat = samplecityd.loc[i,\"elat\"]\n","    elon = samplecityd.loc[i,\"elon\"]\n","    dist = 6371.01 * np.arccos(np.sin(slat)*np.sin(elat) + \n","                          np.cos(slat)*np.cos(elat)*np.cos(slon.values - elon))\n","    city_index = np.argsort(dist)[0]\n","    #每次计算后，取距离最近的城市，然后将最近的城市和城市对应的气候都匹配到samplecityd中\n","    samplecityd.loc[i,\"closest_city\"] = citylld.loc[city_index,\"City\"]\n","    samplecityd.loc[i,\"climate\"] = citylld.loc[city_index,\"climate\"]"],"execution_count":0,"outputs":[{"output_type":"stream","text":["C:\\Python\\lib\\site-packages\\ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in arccos\n","  \n"],"name":"stderr"}]},{"metadata":{"id":"b7UJL7NfJGzH","colab_type":"code","colab":{},"outputId":"c46bab0b-5649-47b5-bcb2-b9f7d52f01e6"},"cell_type":"code","source":["#查看最后的结果，需要检查城市匹配是否基本正确\n","samplecityd.head(5)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>City</th>\n","      <th>Latitudenum</th>\n","      <th>Longitudenum</th>\n","      <th>elat</th>\n","      <th>elon</th>\n","      <th>closest_city</th>\n","      <th>climate</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Canberra</td>\n","      <td>35.2809</td>\n","      <td>149.1300</td>\n","      <td>0.615768</td>\n","      <td>2.602810</td>\n","      <td>Canberra</td>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sydney</td>\n","      <td>33.8688</td>\n","      <td>151.2093</td>\n","      <td>0.591122</td>\n","      <td>2.639100</td>\n","      <td>Sydney</td>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Perth</td>\n","      <td>31.9505</td>\n","      <td>115.8605</td>\n","      <td>0.557641</td>\n","      <td>2.022147</td>\n","      <td>Perth</td>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Darwin</td>\n","      <td>12.4634</td>\n","      <td>130.8456</td>\n","      <td>0.217527</td>\n","      <td>2.283687</td>\n","      <td>Darwin</td>\n","      <td>High humidity summer, warm winter</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Hobart</td>\n","      <td>42.8821</td>\n","      <td>147.3272</td>\n","      <td>0.748434</td>\n","      <td>2.571345</td>\n","      <td>Hobart</td>\n","      <td>Cool temperate</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["       City  Latitudenum  Longitudenum      elat      elon closest_city  \\\n","0  Canberra      35.2809      149.1300  0.615768  2.602810     Canberra   \n","1    Sydney      33.8688      151.2093  0.591122  2.639100       Sydney   \n","2     Perth      31.9505      115.8605  0.557641  2.022147        Perth   \n","3    Darwin      12.4634      130.8456  0.217527  2.283687       Darwin   \n","4    Hobart      42.8821      147.3272  0.748434  2.571345       Hobart   \n","\n","                             climate  \n","0                  Cool temperate     \n","1                  Warm temperate     \n","2                  Warm temperate     \n","3  High humidity summer, warm winter  \n","4                  Cool temperate     "]},"metadata":{"tags":[]},"execution_count":75}]},{"metadata":{"id":"7f9xmDdpJGzK","colab_type":"code","colab":{},"outputId":"bbc13646-4d49-442a-c07e-22e60a6c688b"},"cell_type":"code","source":["#查看气候的分布\n","samplecityd[\"climate\"].value_counts()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Warm temperate                       15\n","Mild temperate                       10\n","Cool temperate                        9\n","Hot dry summer, cool winter           6\n","High humidity summer, warm winter     4\n","Hot dry summer, warm winter           3\n","Warm humid summer, mild winter        2\n","Name: climate, dtype: int64"]},"metadata":{"tags":[]},"execution_count":76}]},{"metadata":{"id":"Y_2IutuYJGzO","colab_type":"code","colab":{}},"cell_type":"code","source":["#确认无误后，取出样本城市所对应的气候，并保存\n","locafinal = samplecityd.iloc[:,[0,-1]]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"KdkHg-cKJGzQ","colab_type":"code","colab":{},"outputId":"b45aa0d8-6b56-466d-9ef4-12e2088d4fcf"},"cell_type":"code","source":["locafinal.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>City</th>\n","      <th>climate</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>Canberra</td>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>Sydney</td>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>Perth</td>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Darwin</td>\n","      <td>High humidity summer, warm winter</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Hobart</td>\n","      <td>Cool temperate</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["       City                            climate\n","0  Canberra                  Cool temperate   \n","1    Sydney                  Warm temperate   \n","2     Perth                  Warm temperate   \n","3    Darwin  High humidity summer, warm winter\n","4    Hobart                  Cool temperate   "]},"metadata":{"tags":[]},"execution_count":78}]},{"metadata":{"id":"2pxh-9OdJGzS","colab_type":"code","colab":{}},"cell_type":"code","source":["locafinal.columns = [\"Location\",\"Climate\"]"],"execution_count":0,"outputs":[]},{"metadata":{"id":"teL9PvlzJGzT","colab_type":"code","colab":{}},"cell_type":"code","source":["#在这里设定locafinal的索引为地点，是为了之后进行map的匹配\n","locafinal = locafinal.set_index(keys=\"Location\")"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Wz8qYWO5JGzX","colab_type":"code","colab":{},"outputId":"4326249f-765f-4aef-d8f0-98ba5554e99d"},"cell_type":"code","source":["locafinal"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Climate</th>\n","    </tr>\n","    <tr>\n","      <th>Location</th>\n","      <th></th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>Canberra</th>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Sydney</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Perth</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Darwin</th>\n","      <td>High humidity summer, warm winter</td>\n","    </tr>\n","    <tr>\n","      <th>Hobart</th>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Brisbane</th>\n","      <td>Warm humid summer, mild winter</td>\n","    </tr>\n","    <tr>\n","      <th>Adelaide</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Bendigo</th>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Townsville</th>\n","      <td>High humidity summer, warm winter</td>\n","    </tr>\n","    <tr>\n","      <th>AliceSprings</th>\n","      <td>Hot dry summer, warm winter</td>\n","    </tr>\n","    <tr>\n","      <th>MountGambier</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Launceston</th>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Ballarat</th>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Albany</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Albury</th>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>PerthAirport</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>MelbourneAirport</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Mildura</th>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>SydneyAirport</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Nuriootpa</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Sale</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Watsonia</th>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>Tuggeranong</th>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Portland</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Woomera</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Cairns</th>\n","      <td>High humidity summer, warm winter</td>\n","    </tr>\n","    <tr>\n","      <th>Cobar</th>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>Wollongong</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>GoldCoast</th>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>WaggaWagga</th>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>NorfolkIsland</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Penrith</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>SalmonGums</th>\n","      <td>Hot dry summer, cool winter</td>\n","    </tr>\n","    <tr>\n","      <th>Newcastle</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>CoffsHarbour</th>\n","      <td>Warm humid summer, mild winter</td>\n","    </tr>\n","    <tr>\n","      <th>Witchcliffe</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Richmond</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Dartmoor</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>NorahHead</th>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>BadgerysCreek</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>MountGinini</th>\n","      <td>Cool temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Moree</th>\n","      <td>Hot dry summer, warm winter</td>\n","    </tr>\n","    <tr>\n","      <th>Walpole</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>PearceRAAF</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Williamtown</th>\n","      <td>Warm temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Melbourne</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Nhil</th>\n","      <td>Mild temperate</td>\n","    </tr>\n","    <tr>\n","      <th>Katherine</th>\n","      <td>High humidity summer, warm winter</td>\n","    </tr>\n","    <tr>\n","      <th>Uluru</th>\n","      <td>Hot dry summer, warm winter</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                            Climate\n","Location                                           \n","Canberra                          Cool temperate   \n","Sydney                            Warm temperate   \n","Perth                             Warm temperate   \n","Darwin            High humidity summer, warm winter\n","Hobart                            Cool temperate   \n","Brisbane             Warm humid summer, mild winter\n","Adelaide                          Warm temperate   \n","Bendigo                           Cool temperate   \n","Townsville        High humidity summer, warm winter\n","AliceSprings            Hot dry summer, warm winter\n","MountGambier                      Mild temperate   \n","Launceston                        Cool temperate   \n","Ballarat                          Cool temperate   \n","Albany                            Mild temperate   \n","Albury                  Hot dry summer, cool winter\n","PerthAirport                      Warm temperate   \n","MelbourneAirport                  Mild temperate   \n","Mildura                 Hot dry summer, cool winter\n","SydneyAirport                     Warm temperate   \n","Nuriootpa                         Warm temperate   \n","Sale                              Mild temperate   \n","Watsonia                Hot dry summer, cool winter\n","Tuggeranong                       Cool temperate   \n","Portland                          Mild temperate   \n","Woomera                           Warm temperate   \n","Cairns            High humidity summer, warm winter\n","Cobar                   Hot dry summer, cool winter\n","Wollongong                        Warm temperate   \n","GoldCoast                         Cool temperate   \n","WaggaWagga              Hot dry summer, cool winter\n","NorfolkIsland                     Warm temperate   \n","Penrith                           Warm temperate   \n","SalmonGums              Hot dry summer, cool winter\n","Newcastle                         Warm temperate   \n","CoffsHarbour         Warm humid summer, mild winter\n","Witchcliffe                       Warm temperate   \n","Richmond                          Mild temperate   \n","Dartmoor                          Mild temperate   \n","NorahHead                         Cool temperate   \n","BadgerysCreek                     Warm temperate   \n","MountGinini                       Cool temperate   \n","Moree                   Hot dry summer, warm winter\n","Walpole                           Mild temperate   \n","PearceRAAF                        Warm temperate   \n","Williamtown                       Warm temperate   \n","Melbourne                         Mild temperate   \n","Nhil                              Mild temperate   \n","Katherine         High humidity summer, warm winter\n","Uluru                   Hot dry summer, warm winter"]},"metadata":{"tags":[]},"execution_count":81}]},{"metadata":{"id":"jWkoyvewJGzZ","colab_type":"code","colab":{}},"cell_type":"code","source":["locafinal.to_csv(r\"samplelocation.csv\")"],"execution_count":0,"outputs":[]},{"metadata":{"id":"2khNVuzsJGzc","colab_type":"code","colab":{},"outputId":"ecac614b-cb27-49f3-a90f-79a65251c061"},"cell_type":"code","source":["#是否还记得训练集长什么样呢？\n","Xtrain.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>8</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>12</td>\n","      <td>Tuggeranong</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>4</td>\n","      <td>Albany</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>11</td>\n","      <td>Sale</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4</td>\n","      <td>Mildura</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month     Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0      8    Katherine     17.5     36.0       0.0          8.8       NaN   \n","1     12  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n","2      4       Albany     13.0     22.6       0.0          3.8      10.4   \n","3     11         Sale     13.9     29.8       0.0          5.8       5.1   \n","4      4      Mildura      6.0     23.5       0.0          2.8       8.6   \n","\n","  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n","0         ESE           26.0        NNW    ...             15.0         57.0   \n","1         NNW           33.0         NE    ...             17.0         59.0   \n","2         NaN            NaN         NE    ...             31.0         79.0   \n","3           S           37.0          N    ...             28.0         82.0   \n","4         NNE           24.0          E    ...             15.0         58.0   \n","\n","   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n","0          NaN       1016.8       1012.2       0.0       NaN     27.5   \n","1         31.0       1020.4       1017.5       NaN       NaN     14.6   \n","2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n","3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n","4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n","\n","   Temp3pm  RainToday  \n","0      NaN         No  \n","1     23.6         No  \n","2     20.8         No  \n","3     27.5         No  \n","4     22.4         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":83}]},{"metadata":{"id":"oKtVCOarJGzd","colab_type":"code","colab":{}},"cell_type":"code","source":["#将location中的内容替换，并且确保匹配进入的气候字符串中不含有逗号，气候两边不含有空格\n","#我们使用re这个模块来消除逗号\n","#re.sub(希望替换的值，希望被替换成的值，要操作的字符串) #去掉逗号\n","#x.strip()是去掉空格的函数\n","#把location替换成气候的是我们的map的映射\n","import re"],"execution_count":0,"outputs":[]},{"metadata":{"id":"zPRnnT5BJGzf","colab_type":"code","colab":{}},"cell_type":"code","source":["#气象站的名字替换成了对应的城市对应的气候\n","Xtrain[\"Location\"] = Xtrain[\"Location\"].map(locafinal.iloc[:,0])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"QZ3iiv1nJGzg","colab_type":"code","colab":{},"outputId":"149ecbfd-a5b8-47d7-99cc-54767b762956"},"cell_type":"code","source":["Xtrain.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Location</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>8</td>\n","      <td>High humidity summer, warm winter</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>12</td>\n","      <td>Cool temperate</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>4</td>\n","      <td>Mild temperate</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>11</td>\n","      <td>Mild temperate</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4</td>\n","      <td>Hot dry summer, cool winter</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month                           Location  MinTemp  MaxTemp  Rainfall  \\\n","0      8  High humidity summer, warm winter     17.5     36.0       0.0   \n","1     12                  Cool temperate         9.5     25.0       0.0   \n","2      4                  Mild temperate        13.0     22.6       0.0   \n","3     11                  Mild temperate        13.9     29.8       0.0   \n","4      4        Hot dry summer, cool winter      6.0     23.5       0.0   \n","\n","   Evaporation  Sunshine WindGustDir  WindGustSpeed WindDir9am    ...      \\\n","0          8.8       NaN         ESE           26.0        NNW    ...       \n","1          NaN       NaN         NNW           33.0         NE    ...       \n","2          3.8      10.4         NaN            NaN         NE    ...       \n","3          5.8       5.1           S           37.0          N    ...       \n","4          2.8       8.6         NNE           24.0          E    ...       \n","\n","  WindSpeed3pm  Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  \\\n","0         15.0         57.0          NaN       1016.8       1012.2       0.0   \n","1         17.0         59.0         31.0       1020.4       1017.5       NaN   \n","2         31.0         79.0         68.0       1020.3       1015.7       1.0   \n","3         28.0         82.0         44.0       1012.5       1005.9       6.0   \n","4         15.0         58.0         35.0       1019.8       1014.1       2.0   \n","\n","   Cloud3pm  Temp9am  Temp3pm  RainToday  \n","0       NaN     27.5      NaN         No  \n","1       NaN     14.6     23.6         No  \n","2       3.0     17.5     20.8         No  \n","3       6.0     18.5     27.5         No  \n","4       4.0     12.4     22.4         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":86}]},{"metadata":{"id":"kwi9DrtwJGzk","colab_type":"code","colab":{}},"cell_type":"code","source":["#城市的气候中所含的逗号和空格都去掉\n","Xtrain[\"Location\"] = Xtrain[\"Location\"].apply(lambda x:re.sub(\",\",\"\",x.strip()))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"cpIPaziQJGzm","colab_type":"code","colab":{}},"cell_type":"code","source":["Xtest[\"Location\"] = Xtest[\"Location\"].map(locafinal.iloc[:,0]).apply(lambda x:re.sub(\",\",\"\",x.strip()))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"yuXBEaIbJGzn","colab_type":"code","colab":{}},"cell_type":"code","source":["#修改特征内容之后，我们使用新列名“Climate”来替换之前的列名“Location”\n","#注意这个命令一旦执行之后，就再没有列\"Location\"了，使用索引时要特别注意\n","Xtrain = Xtrain.rename(columns={\"Location\":\"Climate\"})\n","Xtest = Xtest.rename(columns={\"Location\":\"Climate\"})"],"execution_count":0,"outputs":[]},{"metadata":{"id":"pSFevrtHJGzo","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":253},"outputId":"046114e7-6440-484c-f778-f900705ec299","executionInfo":{"status":"ok","timestamp":1546769412850,"user_tz":-480,"elapsed":762,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.head()"],"execution_count":60,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Climate</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>8</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>12</td>\n","      <td>Tuggeranong</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>4</td>\n","      <td>Albany</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>11</td>\n","      <td>Sale</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4</td>\n","      <td>Mildura</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month      Climate  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0      8    Katherine     17.5     36.0       0.0          8.8       NaN   \n","1     12  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n","2      4       Albany     13.0     22.6       0.0          3.8      10.4   \n","3     11         Sale     13.9     29.8       0.0          5.8       5.1   \n","4      4      Mildura      6.0     23.5       0.0          2.8       8.6   \n","\n","  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n","0         ESE           26.0        NNW    ...             15.0         57.0   \n","1         NNW           33.0         NE    ...             17.0         59.0   \n","2         NaN            NaN         NE    ...             31.0         79.0   \n","3           S           37.0          N    ...             28.0         82.0   \n","4         NNE           24.0          E    ...             15.0         58.0   \n","\n","   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n","0          NaN       1016.8       1012.2       0.0       NaN     27.5   \n","1         31.0       1020.4       1017.5       NaN       NaN     14.6   \n","2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n","3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n","4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n","\n","   Temp3pm  RainToday  \n","0      NaN         No  \n","1     23.6         No  \n","2     20.8         No  \n","3     27.5         No  \n","4     22.4         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":60}]},{"metadata":{"id":"NJt3MXT2JGzr","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":253},"outputId":"d2cf7e14-2703-422c-dc30-a761c7d6d06b","executionInfo":{"status":"ok","timestamp":1546769417319,"user_tz":-480,"elapsed":869,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtest.head()"],"execution_count":61,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Climate</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1</td>\n","      <td>NorahHead</td>\n","      <td>22.0</td>\n","      <td>27.8</td>\n","      <td>25.2</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SSW</td>\n","      <td>57.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>37.0</td>\n","      <td>91.0</td>\n","      <td>86.0</td>\n","      <td>1006.6</td>\n","      <td>1008.1</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>26.2</td>\n","      <td>23.1</td>\n","      <td>Yes</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>3</td>\n","      <td>MountGambier</td>\n","      <td>12.0</td>\n","      <td>18.6</td>\n","      <td>2.2</td>\n","      <td>3.0</td>\n","      <td>7.8</td>\n","      <td>SW</td>\n","      <td>52.0</td>\n","      <td>SW</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>88.0</td>\n","      <td>62.0</td>\n","      <td>1020.2</td>\n","      <td>1019.9</td>\n","      <td>8.0</td>\n","      <td>7.0</td>\n","      <td>14.8</td>\n","      <td>17.5</td>\n","      <td>Yes</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>3</td>\n","      <td>MountGinini</td>\n","      <td>9.1</td>\n","      <td>13.3</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>41.0</td>\n","      <td>NaN</td>\n","      <td>...</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>10</td>\n","      <td>Wollongong</td>\n","      <td>13.1</td>\n","      <td>20.3</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SW</td>\n","      <td>33.0</td>\n","      <td>W</td>\n","      <td>...</td>\n","      <td>24.0</td>\n","      <td>40.0</td>\n","      <td>51.0</td>\n","      <td>1021.3</td>\n","      <td>1019.5</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>16.8</td>\n","      <td>19.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>11</td>\n","      <td>Sale</td>\n","      <td>12.2</td>\n","      <td>20.0</td>\n","      <td>0.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>E</td>\n","      <td>33.0</td>\n","      <td>SW</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>92.0</td>\n","      <td>69.0</td>\n","      <td>1015.6</td>\n","      <td>1013.2</td>\n","      <td>8.0</td>\n","      <td>4.0</td>\n","      <td>13.6</td>\n","      <td>19.0</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month       Climate  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0      1     NorahHead     22.0     27.8      25.2          NaN       NaN   \n","1      3  MountGambier     12.0     18.6       2.2          3.0       7.8   \n","2      3   MountGinini      9.1     13.3       NaN          NaN       NaN   \n","3     10    Wollongong     13.1     20.3       0.0          NaN       NaN   \n","4     11          Sale     12.2     20.0       0.4          NaN       NaN   \n","\n","  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n","0         SSW           57.0          S    ...             37.0         91.0   \n","1          SW           52.0         SW    ...             28.0         88.0   \n","2          NE           41.0        NaN    ...              NaN          NaN   \n","3          SW           33.0          W    ...             24.0         40.0   \n","4           E           33.0         SW    ...             19.0         92.0   \n","\n","   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n","0         86.0       1006.6       1008.1       NaN       NaN     26.2   \n","1         62.0       1020.2       1019.9       8.0       7.0     14.8   \n","2          NaN          NaN          NaN       NaN       NaN      NaN   \n","3         51.0       1021.3       1019.5       NaN       NaN     16.8   \n","4         69.0       1015.6       1013.2       8.0       4.0     13.6   \n","\n","   Temp3pm  RainToday  \n","0     23.1        Yes  \n","1     17.5        Yes  \n","2      NaN        NaN  \n","3     19.6         No  \n","4     19.0         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":61}]},{"metadata":{"id":"_g6Fm5cVqOFq","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"11584ebf-46dd-439a-8f01-7a955cdf3c8c","executionInfo":{"status":"ok","timestamp":1546769786731,"user_tz":-480,"elapsed":688,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain[\"Climate\"].unique().shape"],"execution_count":67,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(49,)"]},"metadata":{"tags":[]},"execution_count":67}]},{"metadata":{"id":"XDZtmvgLqYEk","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"929e2d2e-cb71-4752-f3d8-afbda535af2c","executionInfo":{"status":"ok","timestamp":1546769778147,"user_tz":-480,"elapsed":731,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain[\"Climate\"].unique().shape"],"execution_count":66,"outputs":[{"output_type":"execute_result","data":{"text/plain":["(49,)"]},"metadata":{"tags":[]},"execution_count":66}]},{"metadata":{"id":"7QcMztJ4o929","colab_type":"text"},"cell_type":"markdown","source":["## 4 处理分类型变量：缺失值"]},{"metadata":{"id":"nytVBm2TJGzz","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":408},"outputId":"6baff49b-2852-4e43-a5f4-24c2d890eeec","executionInfo":{"status":"ok","timestamp":1546769421940,"user_tz":-480,"elapsed":737,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["#查看缺失值的缺失情况\n","Xtrain.isnull().mean()"],"execution_count":62,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Month            0.000000\n","Climate          0.000000\n","MinTemp          0.004000\n","MaxTemp          0.003143\n","Rainfall         0.009429\n","Evaporation      0.433429\n","Sunshine         0.488571\n","WindGustDir      0.067714\n","WindGustSpeed    0.067714\n","WindDir9am       0.067429\n","WindDir3pm       0.024286\n","WindSpeed9am     0.009714\n","WindSpeed3pm     0.018000\n","Humidity9am      0.011714\n","Humidity3pm      0.026286\n","Pressure9am      0.098857\n","Pressure3pm      0.098857\n","Cloud9am         0.379714\n","Cloud3pm         0.401429\n","Temp9am          0.005429\n","Temp3pm          0.019714\n","RainToday        0.009429\n","dtype: float64"]},"metadata":{"tags":[]},"execution_count":62}]},{"metadata":{"id":"iKrhIawMJGz3","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":476},"outputId":"e71b646f-db2f-4a4d-f0f7-b536b3095e9d","executionInfo":{"status":"ok","timestamp":1546769686387,"user_tz":-480,"elapsed":836,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.info()"],"execution_count":63,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 3500 entries, 0 to 3499\n","Data columns (total 22 columns):\n","Month            3500 non-null int64\n","Climate          3500 non-null object\n","MinTemp          3486 non-null float64\n","MaxTemp          3489 non-null float64\n","Rainfall         3467 non-null float64\n","Evaporation      1983 non-null float64\n","Sunshine         1790 non-null float64\n","WindGustDir      3263 non-null object\n","WindGustSpeed    3263 non-null float64\n","WindDir9am       3264 non-null object\n","WindDir3pm       3415 non-null object\n","WindSpeed9am     3466 non-null float64\n","WindSpeed3pm     3437 non-null float64\n","Humidity9am      3459 non-null float64\n","Humidity3pm      3408 non-null float64\n","Pressure9am      3154 non-null float64\n","Pressure3pm      3154 non-null float64\n","Cloud9am         2171 non-null float64\n","Cloud3pm         2095 non-null float64\n","Temp9am          3481 non-null float64\n","Temp3pm          3431 non-null float64\n","RainToday        3467 non-null object\n","dtypes: float64(16), int64(1), object(5)\n","memory usage: 601.6+ KB\n"],"name":"stdout"}]},{"metadata":{"id":"LUqpaPBdJGz6","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":408},"outputId":"878eaeca-86e9-40c3-e4dc-27deb244dbbd","executionInfo":{"status":"ok","timestamp":1546769801628,"user_tz":-480,"elapsed":663,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.dtypes == \"object\""],"execution_count":68,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Month            False\n","Climate           True\n","MinTemp          False\n","MaxTemp          False\n","Rainfall         False\n","Evaporation      False\n","Sunshine         False\n","WindGustDir       True\n","WindGustSpeed    False\n","WindDir9am        True\n","WindDir3pm        True\n","WindSpeed9am     False\n","WindSpeed3pm     False\n","Humidity9am      False\n","Humidity3pm      False\n","Pressure9am      False\n","Pressure3pm      False\n","Cloud9am         False\n","Cloud3pm         False\n","Temp9am          False\n","Temp3pm          False\n","RainToday         True\n","dtype: bool"]},"metadata":{"tags":[]},"execution_count":68}]},{"metadata":{"id":"tbiGz84RJG0A","colab_type":"code","colab":{}},"cell_type":"code","source":["#首先找出，分类型特征都有哪些\n","cate = Xtrain.columns[Xtrain.dtypes == \"object\"].tolist()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"VFtxJSc7JG0B","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"85c2dc4a-e1d9-457b-9272-78ae383c1a01","executionInfo":{"status":"ok","timestamp":1546769808165,"user_tz":-480,"elapsed":664,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["cate"],"execution_count":70,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['Climate', 'WindGustDir', 'WindDir9am', 'WindDir3pm', 'RainToday']"]},"metadata":{"tags":[]},"execution_count":70}]},{"metadata":{"id":"ONdD6glUJG0C","colab_type":"code","colab":{}},"cell_type":"code","source":["#除了特征类型为\"object\"的特征们，还有虽然用数字表示，但是本质为分类型特征的云层遮蔽程度\n","cloud = [\"Cloud9am\",\"Cloud3pm\"]\n","cate = cate + cloud"],"execution_count":0,"outputs":[]},{"metadata":{"id":"T7r7nD3eJG0G","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":136},"outputId":"56dcbccc-ef32-4e85-f652-aad58d2e75ae","executionInfo":{"status":"ok","timestamp":1546769815693,"user_tz":-480,"elapsed":745,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["cate"],"execution_count":72,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['Climate',\n"," 'WindGustDir',\n"," 'WindDir9am',\n"," 'WindDir3pm',\n"," 'RainToday',\n"," 'Cloud9am',\n"," 'Cloud3pm']"]},"metadata":{"tags":[]},"execution_count":72}]},{"metadata":{"id":"T9gV0oqGJG0I","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":51},"outputId":"1e9b2a3f-f8d2-402d-fe84-66565b185753","executionInfo":{"status":"ok","timestamp":1546769825314,"user_tz":-480,"elapsed":1020,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["#对于分类型特征，我们使用众数来进行填补\n","from sklearn.impute import SimpleImputer #0.20, conda, pip\n","\n","si = SimpleImputer(missing_values=np.nan,strategy=\"most_frequent\")\n","#注意，我们使用训练集数据来训练我们的填补器，本质是在生成训练集中的众数\n","si.fit(Xtrain.loc[:,cate])"],"execution_count":73,"outputs":[{"output_type":"execute_result","data":{"text/plain":["SimpleImputer(copy=True, fill_value=None, missing_values=nan,\n","       strategy='most_frequent', verbose=0)"]},"metadata":{"tags":[]},"execution_count":73}]},{"metadata":{"id":"b1zMxb6ZJG0I","colab_type":"code","colab":{}},"cell_type":"code","source":["#然后我们用训练集中的众数来同时填补训练集和测试集\n","Xtrain.loc[:,cate] = si.transform(Xtrain.loc[:,cate])\n","Xtest.loc[:,cate] = si.transform(Xtest.loc[:,cate])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"91FqXGUJJG0J","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":253},"outputId":"75e0abea-4f27-44ad-a3a7-2ccfdfc22ccd","executionInfo":{"status":"ok","timestamp":1546769917688,"user_tz":-480,"elapsed":1218,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.head()"],"execution_count":75,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Climate</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>8</td>\n","      <td>Katherine</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>ESE</td>\n","      <td>26.0</td>\n","      <td>NNW</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>7.0</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>12</td>\n","      <td>Tuggeranong</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NNW</td>\n","      <td>33.0</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>4</td>\n","      <td>Albany</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>W</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>...</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>11</td>\n","      <td>Sale</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>S</td>\n","      <td>37.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4</td>\n","      <td>Mildura</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>NNE</td>\n","      <td>24.0</td>\n","      <td>E</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month      Climate  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0      8    Katherine     17.5     36.0       0.0          8.8       NaN   \n","1     12  Tuggeranong      9.5     25.0       0.0          NaN       NaN   \n","2      4       Albany     13.0     22.6       0.0          3.8      10.4   \n","3     11         Sale     13.9     29.8       0.0          5.8       5.1   \n","4      4      Mildura      6.0     23.5       0.0          2.8       8.6   \n","\n","  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n","0         ESE           26.0        NNW    ...             15.0         57.0   \n","1         NNW           33.0         NE    ...             17.0         59.0   \n","2           W            NaN         NE    ...             31.0         79.0   \n","3           S           37.0          N    ...             28.0         82.0   \n","4         NNE           24.0          E    ...             15.0         58.0   \n","\n","   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n","0          NaN       1016.8       1012.2       0.0       7.0     27.5   \n","1         31.0       1020.4       1017.5       7.0       7.0     14.6   \n","2         68.0       1020.3       1015.7       1.0       3.0     17.5   \n","3         44.0       1012.5       1005.9       6.0       6.0     18.5   \n","4         35.0       1019.8       1014.1       2.0       4.0     12.4   \n","\n","   Temp3pm  RainToday  \n","0      NaN         No  \n","1     23.6         No  \n","2     20.8         No  \n","3     27.5         No  \n","4     22.4         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":75}]},{"metadata":{"id":"pfIYbHe1JG0K","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":253},"outputId":"a0de030e-3f8a-4317-8e46-7527d09d8bed","executionInfo":{"status":"ok","timestamp":1546769930955,"user_tz":-480,"elapsed":1550,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtest.head()"],"execution_count":76,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Climate</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1</td>\n","      <td>NorahHead</td>\n","      <td>22.0</td>\n","      <td>27.8</td>\n","      <td>25.2</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SSW</td>\n","      <td>57.0</td>\n","      <td>S</td>\n","      <td>...</td>\n","      <td>37.0</td>\n","      <td>91.0</td>\n","      <td>86.0</td>\n","      <td>1006.6</td>\n","      <td>1008.1</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>26.2</td>\n","      <td>23.1</td>\n","      <td>Yes</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>3</td>\n","      <td>MountGambier</td>\n","      <td>12.0</td>\n","      <td>18.6</td>\n","      <td>2.2</td>\n","      <td>3.0</td>\n","      <td>7.8</td>\n","      <td>SW</td>\n","      <td>52.0</td>\n","      <td>SW</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>88.0</td>\n","      <td>62.0</td>\n","      <td>1020.2</td>\n","      <td>1019.9</td>\n","      <td>8.0</td>\n","      <td>7.0</td>\n","      <td>14.8</td>\n","      <td>17.5</td>\n","      <td>Yes</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>3</td>\n","      <td>MountGinini</td>\n","      <td>9.1</td>\n","      <td>13.3</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NE</td>\n","      <td>41.0</td>\n","      <td>N</td>\n","      <td>...</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>10</td>\n","      <td>Wollongong</td>\n","      <td>13.1</td>\n","      <td>20.3</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>SW</td>\n","      <td>33.0</td>\n","      <td>W</td>\n","      <td>...</td>\n","      <td>24.0</td>\n","      <td>40.0</td>\n","      <td>51.0</td>\n","      <td>1021.3</td>\n","      <td>1019.5</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>16.8</td>\n","      <td>19.6</td>\n","      <td>No</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>11</td>\n","      <td>Sale</td>\n","      <td>12.2</td>\n","      <td>20.0</td>\n","      <td>0.4</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>E</td>\n","      <td>33.0</td>\n","      <td>SW</td>\n","      <td>...</td>\n","      <td>19.0</td>\n","      <td>92.0</td>\n","      <td>69.0</td>\n","      <td>1015.6</td>\n","      <td>1013.2</td>\n","      <td>8.0</td>\n","      <td>4.0</td>\n","      <td>13.6</td>\n","      <td>19.0</td>\n","      <td>No</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month       Climate  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0      1     NorahHead     22.0     27.8      25.2          NaN       NaN   \n","1      3  MountGambier     12.0     18.6       2.2          3.0       7.8   \n","2      3   MountGinini      9.1     13.3       NaN          NaN       NaN   \n","3     10    Wollongong     13.1     20.3       0.0          NaN       NaN   \n","4     11          Sale     12.2     20.0       0.4          NaN       NaN   \n","\n","  WindGustDir  WindGustSpeed WindDir9am    ...     WindSpeed3pm  Humidity9am  \\\n","0         SSW           57.0          S    ...             37.0         91.0   \n","1          SW           52.0         SW    ...             28.0         88.0   \n","2          NE           41.0          N    ...              NaN          NaN   \n","3          SW           33.0          W    ...             24.0         40.0   \n","4           E           33.0         SW    ...             19.0         92.0   \n","\n","   Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  \\\n","0         86.0       1006.6       1008.1       7.0       7.0     26.2   \n","1         62.0       1020.2       1019.9       8.0       7.0     14.8   \n","2          NaN          NaN          NaN       7.0       7.0      NaN   \n","3         51.0       1021.3       1019.5       7.0       7.0     16.8   \n","4         69.0       1015.6       1013.2       8.0       4.0     13.6   \n","\n","   Temp3pm  RainToday  \n","0     23.1        Yes  \n","1     17.5        Yes  \n","2      NaN         No  \n","3     19.6         No  \n","4     19.0         No  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":76}]},{"metadata":{"id":"H_bnx0yuJG0M","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":153},"outputId":"8e67798a-5f0d-4930-9daa-4f8271ed535c","executionInfo":{"status":"ok","timestamp":1546769934210,"user_tz":-480,"elapsed":1591,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["#查看分类型特征是否依然存在缺失值\n","Xtrain.loc[:,cate].isnull().mean()"],"execution_count":77,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Climate        0.0\n","WindGustDir    0.0\n","WindDir9am     0.0\n","WindDir3pm     0.0\n","RainToday      0.0\n","Cloud9am       0.0\n","Cloud3pm       0.0\n","dtype: float64"]},"metadata":{"tags":[]},"execution_count":77}]},{"metadata":{"id":"fdr5NdGSJG0N","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":153},"outputId":"8da85434-3056-46b2-e9e5-d0cb2cfa5d33","executionInfo":{"status":"ok","timestamp":1546769937177,"user_tz":-480,"elapsed":749,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtest.loc[:,cate].isnull().mean()"],"execution_count":78,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Climate        0.0\n","WindGustDir    0.0\n","WindDir9am     0.0\n","WindDir3pm     0.0\n","RainToday      0.0\n","Cloud9am       0.0\n","Cloud3pm       0.0\n","dtype: float64"]},"metadata":{"tags":[]},"execution_count":78}]},{"metadata":{"id":"LnJxbzHWrRoW","colab_type":"text"},"cell_type":"markdown","source":["## 5 处理分类型变量：将分类型变量编码"]},{"metadata":{"id":"1bWQ9-P7JG0O","colab_type":"code","colab":{}},"cell_type":"code","source":["#将所有的分类型变量编码为数字，一个类别是一个数字\n","from sklearn.preprocessing import OrdinalEncoder #只允许二维以上的数据进行输入\n","oe = OrdinalEncoder()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"H9q1f_cPJG0T","colab_type":"code","colab":{}},"cell_type":"code","source":["#利用训练集进行fit\n","oe = oe.fit(Xtrain.loc[:,cate])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"-IVNkSG4JG0X","colab_type":"code","colab":{}},"cell_type":"code","source":["#用训练集的编码结果来编码训练和测试特征矩阵\n","#在这里如果测试特征矩阵报错，就说明测试集中出现了训练集中从未见过的类别\n","Xtrain.loc[:,cate] = oe.transform(Xtrain.loc[:,cate])\n","Xtest.loc[:,cate] = oe.transform(Xtest.loc[:,cate])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"Ngopugn2JG0Y","colab_type":"code","colab":{},"outputId":"cec26cdb-caf4-4658-99b3-9674524d2285"},"cell_type":"code","source":["cate"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['Climate',\n"," 'WindGustDir',\n"," 'WindDir9am',\n"," 'WindDir3pm',\n"," 'RainToday',\n"," 'Cloud9am',\n"," 'Cloud3pm']"]},"metadata":{"tags":[]},"execution_count":108}]},{"metadata":{"id":"7wBfKvKBJG0a","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":204},"outputId":"656ebf9a-1869-476d-ba93-132bc8369770","executionInfo":{"status":"ok","timestamp":1546770130083,"user_tz":-480,"elapsed":1416,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.loc[:,cate].head()"],"execution_count":82,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Climate</th>\n","      <th>WindGustDir</th>\n","      <th>WindDir9am</th>\n","      <th>WindDir3pm</th>\n","      <th>RainToday</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>16.0</td>\n","      <td>2.0</td>\n","      <td>6.0</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>7.0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>40.0</td>\n","      <td>6.0</td>\n","      <td>4.0</td>\n","      <td>6.0</td>\n","      <td>0.0</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>1.0</td>\n","      <td>13.0</td>\n","      <td>4.0</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>35.0</td>\n","      <td>8.0</td>\n","      <td>3.0</td>\n","      <td>8.0</td>\n","      <td>0.0</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>20.0</td>\n","      <td>5.0</td>\n","      <td>0.0</td>\n","      <td>6.0</td>\n","      <td>0.0</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   Climate  WindGustDir  WindDir9am  WindDir3pm  RainToday  Cloud9am  Cloud3pm\n","0     16.0          2.0         6.0         0.0        0.0       0.0       7.0\n","1     40.0          6.0         4.0         6.0        0.0       7.0       7.0\n","2      1.0         13.0         4.0         0.0        0.0       1.0       3.0\n","3     35.0          8.0         3.0         8.0        0.0       6.0       6.0\n","4     20.0          5.0         0.0         6.0        0.0       2.0       4.0"]},"metadata":{"tags":[]},"execution_count":82}]},{"metadata":{"id":"ie9TolssJG0e","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":204},"outputId":"d5ae1bd6-f1ee-4f71-861e-a1de429e0f5e","executionInfo":{"status":"ok","timestamp":1546770148293,"user_tz":-480,"elapsed":980,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtest.loc[:,cate].head()"],"execution_count":83,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Climate</th>\n","      <th>WindGustDir</th>\n","      <th>WindDir9am</th>\n","      <th>WindDir3pm</th>\n","      <th>RainToday</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>26.0</td>\n","      <td>11.0</td>\n","      <td>8.0</td>\n","      <td>11.0</td>\n","      <td>1.0</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>22.0</td>\n","      <td>12.0</td>\n","      <td>12.0</td>\n","      <td>8.0</td>\n","      <td>1.0</td>\n","      <td>8.0</td>\n","      <td>7.0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>23.0</td>\n","      <td>4.0</td>\n","      <td>3.0</td>\n","      <td>9.0</td>\n","      <td>0.0</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>47.0</td>\n","      <td>12.0</td>\n","      <td>13.0</td>\n","      <td>9.0</td>\n","      <td>0.0</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>35.0</td>\n","      <td>0.0</td>\n","      <td>12.0</td>\n","      <td>0.0</td>\n","      <td>0.0</td>\n","      <td>8.0</td>\n","      <td>4.0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   Climate  WindGustDir  WindDir9am  WindDir3pm  RainToday  Cloud9am  Cloud3pm\n","0     26.0         11.0         8.0        11.0        1.0       7.0       7.0\n","1     22.0         12.0        12.0         8.0        1.0       8.0       7.0\n","2     23.0          4.0         3.0         9.0        0.0       7.0       7.0\n","3     47.0         12.0        13.0         9.0        0.0       7.0       7.0\n","4     35.0          0.0        12.0         0.0        0.0       8.0       4.0"]},"metadata":{"tags":[]},"execution_count":83}]},{"metadata":{"id":"HFMjXfKlJG0j","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":253},"outputId":"faf44af9-59d4-4a88-d11b-1dbf0bf52535","executionInfo":{"status":"ok","timestamp":1546770160121,"user_tz":-480,"elapsed":1279,"user":{"displayName":"Sen Yang","photoUrl":"","userId":"00832503676208839570"}}},"cell_type":"code","source":["Xtrain.head()"],"execution_count":84,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Climate</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>8</td>\n","      <td>16.0</td>\n","      <td>17.5</td>\n","      <td>36.0</td>\n","      <td>0.0</td>\n","      <td>8.8</td>\n","      <td>NaN</td>\n","      <td>2.0</td>\n","      <td>26.0</td>\n","      <td>6.0</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>57.0</td>\n","      <td>NaN</td>\n","      <td>1016.8</td>\n","      <td>1012.2</td>\n","      <td>0.0</td>\n","      <td>7.0</td>\n","      <td>27.5</td>\n","      <td>NaN</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>12</td>\n","      <td>40.0</td>\n","      <td>9.5</td>\n","      <td>25.0</td>\n","      <td>0.0</td>\n","      <td>NaN</td>\n","      <td>NaN</td>\n","      <td>6.0</td>\n","      <td>33.0</td>\n","      <td>4.0</td>\n","      <td>...</td>\n","      <td>17.0</td>\n","      <td>59.0</td>\n","      <td>31.0</td>\n","      <td>1020.4</td>\n","      <td>1017.5</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>14.6</td>\n","      <td>23.6</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>4</td>\n","      <td>1.0</td>\n","      <td>13.0</td>\n","      <td>22.6</td>\n","      <td>0.0</td>\n","      <td>3.8</td>\n","      <td>10.4</td>\n","      <td>13.0</td>\n","      <td>NaN</td>\n","      <td>4.0</td>\n","      <td>...</td>\n","      <td>31.0</td>\n","      <td>79.0</td>\n","      <td>68.0</td>\n","      <td>1020.3</td>\n","      <td>1015.7</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>17.5</td>\n","      <td>20.8</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>11</td>\n","      <td>35.0</td>\n","      <td>13.9</td>\n","      <td>29.8</td>\n","      <td>0.0</td>\n","      <td>5.8</td>\n","      <td>5.1</td>\n","      <td>8.0</td>\n","      <td>37.0</td>\n","      <td>3.0</td>\n","      <td>...</td>\n","      <td>28.0</td>\n","      <td>82.0</td>\n","      <td>44.0</td>\n","      <td>1012.5</td>\n","      <td>1005.9</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>18.5</td>\n","      <td>27.5</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4</td>\n","      <td>20.0</td>\n","      <td>6.0</td>\n","      <td>23.5</td>\n","      <td>0.0</td>\n","      <td>2.8</td>\n","      <td>8.6</td>\n","      <td>5.0</td>\n","      <td>24.0</td>\n","      <td>0.0</td>\n","      <td>...</td>\n","      <td>15.0</td>\n","      <td>58.0</td>\n","      <td>35.0</td>\n","      <td>1019.8</td>\n","      <td>1014.1</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>12.4</td>\n","      <td>22.4</td>\n","      <td>0.0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month  Climate  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0      8     16.0     17.5     36.0       0.0          8.8       NaN   \n","1     12     40.0      9.5     25.0       0.0          NaN       NaN   \n","2      4      1.0     13.0     22.6       0.0          3.8      10.4   \n","3     11     35.0     13.9     29.8       0.0          5.8       5.1   \n","4      4     20.0      6.0     23.5       0.0          2.8       8.6   \n","\n","   WindGustDir  WindGustSpeed  WindDir9am    ...      WindSpeed3pm  \\\n","0          2.0           26.0         6.0    ...              15.0   \n","1          6.0           33.0         4.0    ...              17.0   \n","2         13.0            NaN         4.0    ...              31.0   \n","3          8.0           37.0         3.0    ...              28.0   \n","4          5.0           24.0         0.0    ...              15.0   \n","\n","   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n","0         57.0          NaN       1016.8       1012.2       0.0       7.0   \n","1         59.0         31.0       1020.4       1017.5       7.0       7.0   \n","2         79.0         68.0       1020.3       1015.7       1.0       3.0   \n","3         82.0         44.0       1012.5       1005.9       6.0       6.0   \n","4         58.0         35.0       1019.8       1014.1       2.0       4.0   \n","\n","   Temp9am  Temp3pm  RainToday  \n","0     27.5      NaN        0.0  \n","1     14.6     23.6        0.0  \n","2     17.5     20.8        0.0  \n","3     18.5     27.5        0.0  \n","4     12.4     22.4        0.0  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":84}]},{"metadata":{"id":"Ax4IgNdUJG0o","colab_type":"code","colab":{}},"cell_type":"code","source":["col = Xtrain.columns.tolist()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"jE91ZwjIJG0p","colab_type":"code","colab":{},"outputId":"d9c1b4a8-1bb1-4114-bab1-c96054bc7982"},"cell_type":"code","source":["col"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['Month',\n"," 'Climate',\n"," 'MinTemp',\n"," 'MaxTemp',\n"," 'Rainfall',\n"," 'Evaporation',\n"," 'Sunshine',\n"," 'WindGustDir',\n"," 'WindGustSpeed',\n"," 'WindDir9am',\n"," 'WindDir3pm',\n"," 'WindSpeed9am',\n"," 'WindSpeed3pm',\n"," 'Humidity9am',\n"," 'Humidity3pm',\n"," 'Pressure9am',\n"," 'Pressure3pm',\n"," 'Cloud9am',\n"," 'Cloud3pm',\n"," 'Temp9am',\n"," 'Temp3pm',\n"," 'RainToday']"]},"metadata":{"tags":[]},"execution_count":113}]},{"metadata":{"id":"EEIISnhkJG0q","colab_type":"code","colab":{},"outputId":"72de1e23-8888-414b-cfa7-d1a7eae9383c"},"cell_type":"code","source":["cate"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['Climate',\n"," 'WindGustDir',\n"," 'WindDir9am',\n"," 'WindDir3pm',\n"," 'RainToday',\n"," 'Cloud9am',\n"," 'Cloud3pm']"]},"metadata":{"tags":[]},"execution_count":114}]},{"metadata":{"id":"nmU-wcDWJG0s","colab_type":"code","colab":{}},"cell_type":"code","source":["for i in cate:\n","    col.remove(i)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"t8DoKWJ5JG0t","colab_type":"code","colab":{},"outputId":"9f53bb27-2c5c-4c00-add1-167a5e8e5562"},"cell_type":"code","source":["col"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['Month',\n"," 'MinTemp',\n"," 'MaxTemp',\n"," 'Rainfall',\n"," 'Evaporation',\n"," 'Sunshine',\n"," 'WindGustSpeed',\n"," 'WindSpeed9am',\n"," 'WindSpeed3pm',\n"," 'Humidity9am',\n"," 'Humidity3pm',\n"," 'Pressure9am',\n"," 'Pressure3pm',\n"," 'Temp9am',\n"," 'Temp3pm']"]},"metadata":{"tags":[]},"execution_count":116}]},{"metadata":{"id":"s184R31aJG0u","colab_type":"code","colab":{}},"cell_type":"code","source":["#实例化模型，填补策略为\"mean\"表示均值\n","impmean = SimpleImputer(missing_values=np.nan,strategy = \"mean\")\n","#用训练集来fit模型\n","impmean = impmean.fit(Xtrain.loc[:,col])\n","#分别在训练集和测试集上进行均值填补\n","Xtrain.loc[:,col] = impmean.transform(Xtrain.loc[:,col])\n","Xtest.loc[:,col] = impmean.transform(Xtest.loc[:,col])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"siVKmqDmJG0v","colab_type":"code","colab":{},"outputId":"3e2e574d-18fe-4a8f-918b-6feec3d4a0ed"},"cell_type":"code","source":["Xtrain.isnull().mean()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Month            0.0\n","Climate          0.0\n","MinTemp          0.0\n","MaxTemp          0.0\n","Rainfall         0.0\n","Evaporation      0.0\n","Sunshine         0.0\n","WindGustDir      0.0\n","WindGustSpeed    0.0\n","WindDir9am       0.0\n","WindDir3pm       0.0\n","WindSpeed9am     0.0\n","WindSpeed3pm     0.0\n","Humidity9am      0.0\n","Humidity3pm      0.0\n","Pressure9am      0.0\n","Pressure3pm      0.0\n","Cloud9am         0.0\n","Cloud3pm         0.0\n","Temp9am          0.0\n","Temp3pm          0.0\n","RainToday        0.0\n","dtype: float64"]},"metadata":{"tags":[]},"execution_count":118}]},{"metadata":{"id":"c2IwClRTJG0w","colab_type":"code","colab":{},"outputId":"7aab4cc2-4cbb-4589-e6b8-aab49e0db9cf"},"cell_type":"code","source":["Xtest.isnull().mean()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Month            0.0\n","Climate          0.0\n","MinTemp          0.0\n","MaxTemp          0.0\n","Rainfall         0.0\n","Evaporation      0.0\n","Sunshine         0.0\n","WindGustDir      0.0\n","WindGustSpeed    0.0\n","WindDir9am       0.0\n","WindDir3pm       0.0\n","WindSpeed9am     0.0\n","WindSpeed3pm     0.0\n","Humidity9am      0.0\n","Humidity3pm      0.0\n","Pressure9am      0.0\n","Pressure3pm      0.0\n","Cloud9am         0.0\n","Cloud3pm         0.0\n","Temp9am          0.0\n","Temp3pm          0.0\n","RainToday        0.0\n","dtype: float64"]},"metadata":{"tags":[]},"execution_count":119}]},{"metadata":{"id":"Cjn3bUvfJG0z","colab_type":"code","colab":{}},"cell_type":"code","source":["col.remove(\"Month\")"],"execution_count":0,"outputs":[]},{"metadata":{"id":"-Dapa-ovJG00","colab_type":"code","colab":{},"outputId":"e99cc86d-9703-4dc1-8cd1-de4c22425a16"},"cell_type":"code","source":["col"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['MinTemp',\n"," 'MaxTemp',\n"," 'Rainfall',\n"," 'Evaporation',\n"," 'Sunshine',\n"," 'WindGustSpeed',\n"," 'WindSpeed9am',\n"," 'WindSpeed3pm',\n"," 'Humidity9am',\n"," 'Humidity3pm',\n"," 'Pressure9am',\n"," 'Pressure3pm',\n"," 'Temp9am',\n"," 'Temp3pm']"]},"metadata":{"tags":[]},"execution_count":121}]},{"metadata":{"id":"4PYlpGgcJG01","colab_type":"code","colab":{}},"cell_type":"code","source":["from sklearn.preprocessing import StandardScaler #数据转换为均值为0，方差为1的数据\n","#标准化不改变数据的分布，不会把数据变成正态分布的"],"execution_count":0,"outputs":[]},{"metadata":{"id":"u57MdTmbJG02","colab_type":"code","colab":{}},"cell_type":"code","source":["ss = StandardScaler()\n","ss = ss.fit(Xtrain.loc[:,col])\n","Xtrain.loc[:,col] = ss.transform(Xtrain.loc[:,col])\n","Xtest.loc[:,col] = ss.transform(Xtest.loc[:,col])"],"execution_count":0,"outputs":[]},{"metadata":{"id":"HBCPxPEmJG03","colab_type":"code","colab":{},"outputId":"fd4afd72-6bfa-41d6-93b1-94ab7299ecbf"},"cell_type":"code","source":["Xtrain.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Climate</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>8.0</td>\n","      <td>1.0</td>\n","      <td>0.826375</td>\n","      <td>1.774044</td>\n","      <td>-0.314379</td>\n","      <td>0.964367</td>\n","      <td>0.000000</td>\n","      <td>2.0</td>\n","      <td>-1.085893e+00</td>\n","      <td>6.0</td>\n","      <td>...</td>\n","      <td>-0.416443</td>\n","      <td>-0.646283</td>\n","      <td>0.000000</td>\n","      <td>-0.122589</td>\n","      <td>-0.453507</td>\n","      <td>0.0</td>\n","      <td>7.0</td>\n","      <td>1.612270</td>\n","      <td>0.000000</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>12.0</td>\n","      <td>0.0</td>\n","      <td>-0.427048</td>\n","      <td>0.244031</td>\n","      <td>-0.314379</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>6.0</td>\n","      <td>-5.373993e-01</td>\n","      <td>4.0</td>\n","      <td>...</td>\n","      <td>-0.182051</td>\n","      <td>-0.539186</td>\n","      <td>-1.011310</td>\n","      <td>0.414254</td>\n","      <td>0.340522</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>-0.366608</td>\n","      <td>0.270238</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>4.0</td>\n","      <td>4.0</td>\n","      <td>0.121324</td>\n","      <td>-0.089790</td>\n","      <td>-0.314379</td>\n","      <td>-0.551534</td>\n","      <td>1.062619</td>\n","      <td>13.0</td>\n","      <td>-1.113509e-15</td>\n","      <td>4.0</td>\n","      <td>...</td>\n","      <td>1.458692</td>\n","      <td>0.531786</td>\n","      <td>0.800547</td>\n","      <td>0.399342</td>\n","      <td>0.070852</td>\n","      <td>1.0</td>\n","      <td>3.0</td>\n","      <td>0.078256</td>\n","      <td>-0.132031</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>11.0</td>\n","      <td>4.0</td>\n","      <td>0.262334</td>\n","      <td>0.911673</td>\n","      <td>-0.314379</td>\n","      <td>0.054826</td>\n","      <td>-0.885225</td>\n","      <td>8.0</td>\n","      <td>-2.239744e-01</td>\n","      <td>3.0</td>\n","      <td>...</td>\n","      <td>1.107105</td>\n","      <td>0.692432</td>\n","      <td>-0.374711</td>\n","      <td>-0.763819</td>\n","      <td>-1.397352</td>\n","      <td>6.0</td>\n","      <td>6.0</td>\n","      <td>0.231658</td>\n","      <td>0.830540</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>4.0</td>\n","      <td>2.0</td>\n","      <td>-0.975421</td>\n","      <td>0.035393</td>\n","      <td>-0.314379</td>\n","      <td>-0.854715</td>\n","      <td>0.401087</td>\n","      <td>5.0</td>\n","      <td>-1.242605e+00</td>\n","      <td>0.0</td>\n","      <td>...</td>\n","      <td>-0.416443</td>\n","      <td>-0.592734</td>\n","      <td>-0.815433</td>\n","      <td>0.324780</td>\n","      <td>-0.168855</td>\n","      <td>2.0</td>\n","      <td>4.0</td>\n","      <td>-0.704091</td>\n","      <td>0.097837</td>\n","      <td>0.0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month  Climate   MinTemp   MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0    8.0      1.0  0.826375  1.774044 -0.314379     0.964367  0.000000   \n","1   12.0      0.0 -0.427048  0.244031 -0.314379     0.000000  0.000000   \n","2    4.0      4.0  0.121324 -0.089790 -0.314379    -0.551534  1.062619   \n","3   11.0      4.0  0.262334  0.911673 -0.314379     0.054826 -0.885225   \n","4    4.0      2.0 -0.975421  0.035393 -0.314379    -0.854715  0.401087   \n","\n","   WindGustDir  WindGustSpeed  WindDir9am    ...      WindSpeed3pm  \\\n","0          2.0  -1.085893e+00         6.0    ...         -0.416443   \n","1          6.0  -5.373993e-01         4.0    ...         -0.182051   \n","2         13.0  -1.113509e-15         4.0    ...          1.458692   \n","3          8.0  -2.239744e-01         3.0    ...          1.107105   \n","4          5.0  -1.242605e+00         0.0    ...         -0.416443   \n","\n","   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n","0    -0.646283     0.000000    -0.122589    -0.453507       0.0       7.0   \n","1    -0.539186    -1.011310     0.414254     0.340522       7.0       7.0   \n","2     0.531786     0.800547     0.399342     0.070852       1.0       3.0   \n","3     0.692432    -0.374711    -0.763819    -1.397352       6.0       6.0   \n","4    -0.592734    -0.815433     0.324780    -0.168855       2.0       4.0   \n","\n","    Temp9am   Temp3pm  RainToday  \n","0  1.612270  0.000000        0.0  \n","1 -0.366608  0.270238        0.0  \n","2  0.078256 -0.132031        0.0  \n","3  0.231658  0.830540        0.0  \n","4 -0.704091  0.097837        0.0  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":124}]},{"metadata":{"id":"7JZgn6A1JG05","colab_type":"code","colab":{},"outputId":"5f1eb9b4-c5ce-40a7-ac59-e5be86bc9734"},"cell_type":"code","source":["Xtest.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Month</th>\n","      <th>Climate</th>\n","      <th>MinTemp</th>\n","      <th>MaxTemp</th>\n","      <th>Rainfall</th>\n","      <th>Evaporation</th>\n","      <th>Sunshine</th>\n","      <th>WindGustDir</th>\n","      <th>WindGustSpeed</th>\n","      <th>WindDir9am</th>\n","      <th>...</th>\n","      <th>WindSpeed3pm</th>\n","      <th>Humidity9am</th>\n","      <th>Humidity3pm</th>\n","      <th>Pressure9am</th>\n","      <th>Pressure3pm</th>\n","      <th>Cloud9am</th>\n","      <th>Cloud3pm</th>\n","      <th>Temp9am</th>\n","      <th>Temp3pm</th>\n","      <th>RainToday</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>1.0</td>\n","      <td>0.0</td>\n","      <td>1.531425</td>\n","      <td>0.633489</td>\n","      <td>2.871067</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>11.0</td>\n","      <td>1.343150</td>\n","      <td>8.0</td>\n","      <td>...</td>\n","      <td>2.161868e+00</td>\n","      <td>1.174369</td>\n","      <td>1.681991</td>\n","      <td>-1.643646</td>\n","      <td>-1.067755</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>1.412848</td>\n","      <td>0.198404</td>\n","      <td>1.0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>3.0</td>\n","      <td>4.0</td>\n","      <td>-0.035354</td>\n","      <td>-0.646158</td>\n","      <td>-0.036285</td>\n","      <td>-0.794079</td>\n","      <td>0.107073</td>\n","      <td>12.0</td>\n","      <td>0.951369</td>\n","      <td>12.0</td>\n","      <td>...</td>\n","      <td>1.107105e+00</td>\n","      <td>1.013723</td>\n","      <td>0.506733</td>\n","      <td>0.384430</td>\n","      <td>0.700082</td>\n","      <td>8.0</td>\n","      <td>7.0</td>\n","      <td>-0.335927</td>\n","      <td>-0.606132</td>\n","      <td>1.0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>3.0</td>\n","      <td>0.0</td>\n","      <td>-0.489720</td>\n","      <td>-1.383346</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>4.0</td>\n","      <td>0.089450</td>\n","      <td>3.0</td>\n","      <td>...</td>\n","      <td>-4.163637e-16</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>10.0</td>\n","      <td>6.0</td>\n","      <td>0.136992</td>\n","      <td>-0.409702</td>\n","      <td>-0.314379</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>12.0</td>\n","      <td>-0.537399</td>\n","      <td>13.0</td>\n","      <td>...</td>\n","      <td>6.383207e-01</td>\n","      <td>-1.556609</td>\n","      <td>-0.031928</td>\n","      <td>0.548465</td>\n","      <td>0.640155</td>\n","      <td>7.0</td>\n","      <td>7.0</td>\n","      <td>-0.029125</td>\n","      <td>-0.304431</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>11.0</td>\n","      <td>4.0</td>\n","      <td>-0.004018</td>\n","      <td>-0.451429</td>\n","      <td>-0.263817</td>\n","      <td>0.000000</td>\n","      <td>0.000000</td>\n","      <td>0.0</td>\n","      <td>-0.537399</td>\n","      <td>12.0</td>\n","      <td>...</td>\n","      <td>5.234093e-02</td>\n","      <td>1.227917</td>\n","      <td>0.849516</td>\n","      <td>-0.301537</td>\n","      <td>-0.303690</td>\n","      <td>8.0</td>\n","      <td>4.0</td>\n","      <td>-0.520009</td>\n","      <td>-0.390632</td>\n","      <td>0.0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>5 rows × 22 columns</p>\n","</div>"],"text/plain":["   Month  Climate   MinTemp   MaxTemp  Rainfall  Evaporation  Sunshine  \\\n","0    1.0      0.0  1.531425  0.633489  2.871067     0.000000  0.000000   \n","1    3.0      4.0 -0.035354 -0.646158 -0.036285    -0.794079  0.107073   \n","2    3.0      0.0 -0.489720 -1.383346  0.000000     0.000000  0.000000   \n","3   10.0      6.0  0.136992 -0.409702 -0.314379     0.000000  0.000000   \n","4   11.0      4.0 -0.004018 -0.451429 -0.263817     0.000000  0.000000   \n","\n","   WindGustDir  WindGustSpeed  WindDir9am    ...      WindSpeed3pm  \\\n","0         11.0       1.343150         8.0    ...      2.161868e+00   \n","1         12.0       0.951369        12.0    ...      1.107105e+00   \n","2          4.0       0.089450         3.0    ...     -4.163637e-16   \n","3         12.0      -0.537399        13.0    ...      6.383207e-01   \n","4          0.0      -0.537399        12.0    ...      5.234093e-02   \n","\n","   Humidity9am  Humidity3pm  Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  \\\n","0     1.174369     1.681991    -1.643646    -1.067755       7.0       7.0   \n","1     1.013723     0.506733     0.384430     0.700082       8.0       7.0   \n","2     0.000000     0.000000     0.000000     0.000000       7.0       7.0   \n","3    -1.556609    -0.031928     0.548465     0.640155       7.0       7.0   \n","4     1.227917     0.849516    -0.301537    -0.303690       8.0       4.0   \n","\n","    Temp9am   Temp3pm  RainToday  \n","0  1.412848  0.198404        1.0  \n","1 -0.335927 -0.606132        1.0  \n","2  0.000000  0.000000        0.0  \n","3 -0.029125 -0.304431        0.0  \n","4 -0.520009 -0.390632        0.0  \n","\n","[5 rows x 22 columns]"]},"metadata":{"tags":[]},"execution_count":125}]},{"metadata":{"id":"_Of6Ks5jJG09","colab_type":"code","colab":{},"outputId":"5164e4c9-8d57-4b93-fa81-84f818d4d140"},"cell_type":"code","source":["Ytrain.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>0</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>1</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["   0\n","0  0\n","1  0\n","2  0\n","3  1\n","4  0"]},"metadata":{"tags":[]},"execution_count":126}]},{"metadata":{"id":"liBadk3pJG0_","colab_type":"code","colab":{}},"cell_type":"code","source":["from time import time #随时监控我们的模型的运行时间\n","import datetime\n","from sklearn.svm import SVC\n","from sklearn.model_selection import cross_val_score\n","from sklearn.metrics import roc_auc_score, recall_score"],"execution_count":0,"outputs":[]},{"metadata":{"id":"8GvwvkYBJG1B","colab_type":"code","colab":{}},"cell_type":"code","source":["Ytrain = Ytrain.iloc[:,0].ravel()\n","Ytest = Ytest.iloc[:,0].ravel()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"8-bEC4h7JG1C","colab_type":"code","colab":{},"outputId":"a24f70c8-d65c-482d-dcf1-6dc76baa08ba"},"cell_type":"code","source":["#建模选择自然是我们的支持向量机SVC，首先用核函数的学习曲线来选择核函数\n","#我们希望同时观察，精确性，recall以及AUC分数\n","times = time() #因为SVM是计算量很大的模型，所以我们需要时刻监控我们的模型运行时间\n","\n","for kernel in [\"linear\",\"poly\",\"rbf\",\"sigmoid\"]:\n","    clf = SVC(kernel = kernel\n","              ,gamma=\"auto\"\n","              ,degree = 1\n","              ,cache_size = 5000\n","             ).fit(Xtrain, Ytrain)\n","    result = clf.predict(Xtest)\n","    score = clf.score(Xtest,Ytest)\n","    recall = recall_score(Ytest, result)\n","    auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n","    print(\"%s 's testing accuracy %f, recall is %f', auc is %f\" % (kernel,score,recall,auc))\n","    print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["linear 's testing accuracy 0.844000, recall is 0.469388', auc is 0.869029\n","00:06:621084\n","poly 's testing accuracy 0.840667, recall is 0.457726', auc is 0.868157\n","00:07:322248\n","rbf 's testing accuracy 0.813333, recall is 0.306122', auc is 0.814873\n","00:09:621109\n","sigmoid 's testing accuracy 0.655333, recall is 0.154519', auc is 0.437308\n","00:10:463819\n"],"name":"stdout"}]},{"metadata":{"id":"LZj5yT7RJG1D","colab_type":"code","colab":{},"outputId":"5d73db7c-a404-44c5-a5ff-63423fd13424"},"cell_type":"code","source":["times = time()\n","for kernel in [\"linear\",\"poly\",\"rbf\",\"sigmoid\"]:\n","    clf = SVC(kernel = kernel\n","              ,gamma=\"auto\"\n","              ,degree = 1\n","              ,cache_size = 5000\n","              ,class_weight = \"balanced\"\n","             ).fit(Xtrain, Ytrain)\n","    result = clf.predict(Xtest)\n","    score = clf.score(Xtest,Ytest)\n","    recall = recall_score(Ytest, result)\n","    auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n","    print(\"%s 's testing accuracy %f, recall is %f', auc is %f\" % (kernel,score,recall,auc))\n","    print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["linear 's testing accuracy 0.796667, recall is 0.775510', auc is 0.870062\n","00:07:740303\n","poly 's testing accuracy 0.793333, recall is 0.763848', auc is 0.871448\n","00:09:007915\n","rbf 's testing accuracy 0.803333, recall is 0.600583', auc is 0.819713\n","00:11:986949\n","sigmoid 's testing accuracy 0.562000, recall is 0.282799', auc is 0.437119\n","00:14:324700\n"],"name":"stdout"}]},{"metadata":{"id":"mhzPcMHXJG1F","colab_type":"code","colab":{},"outputId":"638d92fe-5d31-4665-c3a1-d202123e0ab7"},"cell_type":"code","source":["times = time()\n","clf = SVC(kernel = \"linear\"\n","          ,gamma=\"auto\"\n","          ,cache_size = 5000\n","          ,class_weight = {1:15} #注意，这里写的其实是，类别1：10，隐藏了类别0：1这个比例\n","         ).fit(Xtrain, Ytrain)\n","result = clf.predict(Xtest)\n","score = clf.score(Xtest,Ytest)\n","recall = recall_score(Ytest, result)\n","auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n","print(\"testing accuracy %f, recall is %f', auc is %f\" %(score,recall,auc))\n","print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["testing accuracy 0.548000, recall is 0.970845', auc is 0.867172\n","00:12:731999\n"],"name":"stdout"}]},{"metadata":{"id":"PE1Ky8DFJG1I","colab_type":"code","colab":{}},"cell_type":"code","source":["valuec = pd.Series(Ytest).value_counts()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"7r8yYPLiJG1I","colab_type":"code","colab":{},"outputId":"628cdacd-ff11-4cd0-81d8-ebd5a6afcd79"},"cell_type":"code","source":["valuec"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0    1157\n","1     343\n","dtype: int64"]},"metadata":{"tags":[]},"execution_count":134}]},{"metadata":{"id":"Juf7NcAHJG1K","colab_type":"code","colab":{},"outputId":"e142768f-cdd2-4da4-d89d-5f19b6e42e7a"},"cell_type":"code","source":["valuec[0]/valuec.sum()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.7713333333333333"]},"metadata":{"tags":[]},"execution_count":135}]},{"metadata":{"id":"8xs_Yz3LJG1M","colab_type":"code","colab":{}},"cell_type":"code","source":["#查看模型的特异度\n","from sklearn.metrics import confusion_matrix as CM"],"execution_count":0,"outputs":[]},{"metadata":{"id":"f8_z71PBJG1P","colab_type":"code","colab":{}},"cell_type":"code","source":["clf = SVC(kernel = \"linear\"\n","          ,gamma=\"auto\"\n","          ,cache_size = 5000\n","         ).fit(Xtrain, Ytrain)\n","result = clf.predict(Xtest)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"G_uLVUNZJG1R","colab_type":"code","colab":{}},"cell_type":"code","source":["cm = CM(Ytest,result,labels=(1,0))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"cR8BM9zhJG1S","colab_type":"code","colab":{},"outputId":"4a816138-60c0-4b82-d233-c121898f9009"},"cell_type":"code","source":["cm"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[ 161,  182],\n","       [  52, 1105]], dtype=int64)"]},"metadata":{"tags":[]},"execution_count":139}]},{"metadata":{"id":"BtVDaq6MJG1T","colab_type":"code","colab":{}},"cell_type":"code","source":["specificity = cm[1,1]/cm[1,:].sum()"],"execution_count":0,"outputs":[]},{"metadata":{"id":"uiCjt_T_JG1V","colab_type":"code","colab":{},"outputId":"409631a9-1043-44d3-98d3-664d9db832da"},"cell_type":"code","source":["specificity #几乎所有的0都被判断正确了，还有不少1也被判断正确了"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.9550561797752809"]},"metadata":{"tags":[]},"execution_count":141}]},{"metadata":{"id":"i1hwFOywJG1Z","colab_type":"code","colab":{}},"cell_type":"code","source":["irange = np.linspace(0.01,0.05,10)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"MyTbGAXZJG1a","colab_type":"code","colab":{},"outputId":"adf7730a-49ac-444c-e916-9b14345248a1"},"cell_type":"code","source":["irange"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([0.01      , 0.01444444, 0.01888889, 0.02333333, 0.02777778,\n","       0.03222222, 0.03666667, 0.04111111, 0.04555556, 0.05      ])"]},"metadata":{"tags":[]},"execution_count":143}]},{"metadata":{"id":"KCucj4CqJG1c","colab_type":"code","colab":{},"outputId":"f6fd75d3-ad28-4a2e-e348-de97de374383"},"cell_type":"code","source":["for i in irange:\n","    times = time()\n","    clf = SVC(kernel = \"linear\"\n","              ,gamma=\"auto\"\n","              ,cache_size = 5000\n","              ,class_weight = {1:1+i}\n","             ).fit(Xtrain, Ytrain)\n","    result = clf.predict(Xtest)\n","    score = clf.score(Xtest,Ytest)\n","    recall = recall_score(Ytest, result)\n","    auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n","    print(\"under ratio 1:%f testing accuracy %f, recall is %f', auc is %f\" %(1+i,score,recall,auc))\n","    print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["under ratio 1:1.010000 testing accuracy 0.844667, recall is 0.475219', auc is 0.869157\n","00:06:717088\n","under ratio 1:1.014444 testing accuracy 0.844667, recall is 0.478134', auc is 0.869185\n","00:06:542548\n","under ratio 1:1.018889 testing accuracy 0.844667, recall is 0.478134', auc is 0.869198\n","00:05:809458\n","under ratio 1:1.023333 testing accuracy 0.845333, recall is 0.481050', auc is 0.869175\n","00:07:305287\n","under ratio 1:1.027778 testing accuracy 0.844000, recall is 0.481050', auc is 0.869394\n","00:06:395937\n","under ratio 1:1.032222 testing accuracy 0.844000, recall is 0.481050', auc is 0.869528\n","00:06:333034\n","under ratio 1:1.036667 testing accuracy 0.844000, recall is 0.481050', auc is 0.869659\n","00:06:923525\n","under ratio 1:1.041111 testing accuracy 0.844667, recall is 0.483965', auc is 0.869629\n","00:08:993914\n","under ratio 1:1.045556 testing accuracy 0.844667, recall is 0.483965', auc is 0.869712\n","00:06:514583\n","under ratio 1:1.050000 testing accuracy 0.845333, recall is 0.486880', auc is 0.869863\n","00:06:779875\n"],"name":"stdout"}]},{"metadata":{"id":"fovywo7UJG1g","colab_type":"code","colab":{}},"cell_type":"code","source":["irange_ = np.linspace(0.018889,0.027778,10)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"KItTTrtHJG1i","colab_type":"code","colab":{},"outputId":"97a855fa-c82d-448c-e6c9-223a3ac1314b"},"cell_type":"code","source":["for i in irange_:\n","    times = time()\n","    clf = SVC(kernel = \"linear\"\n","              ,gamma=\"auto\"\n","              ,cache_size = 5000\n","              ,class_weight = {1:1+i}\n","             ).fit(Xtrain, Ytrain)\n","    result = clf.predict(Xtest)\n","    score = clf.score(Xtest,Ytest)\n","    recall = recall_score(Ytest, result)\n","    auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n","    print(\"under ratio 1:%f testing accuracy %f, recall is %f', auc is %f\" %(1+i,score,recall,auc))\n","    print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["under ratio 1:1.018889 testing accuracy 0.844667, recall is 0.478134', auc is 0.869213\n","00:07:301489\n","under ratio 1:1.019877 testing accuracy 0.844000, recall is 0.478134', auc is 0.869228\n","00:06:855660\n","under ratio 1:1.020864 testing accuracy 0.844000, recall is 0.478134', auc is 0.869218\n","00:06:545535\n","under ratio 1:1.021852 testing accuracy 0.844667, recall is 0.478134', auc is 0.869188\n","00:06:298092\n","under ratio 1:1.022840 testing accuracy 0.844667, recall is 0.478134', auc is 0.869220\n","00:05:309544\n","under ratio 1:1.023827 testing accuracy 0.844667, recall is 0.481050', auc is 0.869188\n","00:04:701487\n","under ratio 1:1.024815 testing accuracy 0.844667, recall is 0.481050', auc is 0.869231\n","00:04:710405\n","under ratio 1:1.025803 testing accuracy 0.844000, recall is 0.481050', auc is 0.869253\n","00:05:062793\n","under ratio 1:1.026790 testing accuracy 0.844000, recall is 0.481050', auc is 0.869314\n","00:05:193115\n","under ratio 1:1.027778 testing accuracy 0.844667, recall is 0.481050', auc is 0.869374\n","00:05:308805\n"],"name":"stdout"}]},{"metadata":{"id":"LU-VGviaJG1o","colab_type":"code","colab":{}},"cell_type":"code","source":["from sklearn.linear_model import LogisticRegression as LR"],"execution_count":0,"outputs":[]},{"metadata":{"id":"wI0IRifhJG1o","colab_type":"code","colab":{},"outputId":"9a5a33c9-df8d-4bf7-f473-c82fc1c03ff3"},"cell_type":"code","source":["logclf = LR(solver=\"liblinear\").fit(Xtrain, Ytrain)\n","logclf.score(Xtest,Ytest)"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.8486666666666667"]},"metadata":{"tags":[]},"execution_count":148}]},{"metadata":{"id":"K0ffVyI1JG1q","colab_type":"code","colab":{}},"cell_type":"code","source":["C_range = np.linspace(5,10,10)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"yXIMfFFTJG1r","colab_type":"code","colab":{},"outputId":"b69f7c33-f3c9-4383-ff86-ca4e806498a4"},"cell_type":"code","source":["for C in C_range:\n","    logclf = LR(solver=\"liblinear\",C=C).fit(Xtrain, Ytrain)\n","    print(C,logclf.score(Xtest,Ytest))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["5.0 0.8493333333333334\n","5.555555555555555 0.8493333333333334\n","6.111111111111111 0.8486666666666667\n","6.666666666666667 0.8493333333333334\n","7.222222222222222 0.8493333333333334\n","7.777777777777778 0.8493333333333334\n","8.333333333333334 0.8493333333333334\n","8.88888888888889 0.8493333333333334\n","9.444444444444445 0.8493333333333334\n","10.0 0.8493333333333334\n"],"name":"stdout"}]},{"metadata":{"id":"j-V4Wc-1JG1t","colab_type":"code","colab":{},"outputId":"e032d7ab-0844-42e8-fe0e-b48a7aa5926d"},"cell_type":"code","source":["times = time()\n","clf = SVC(kernel = \"linear\",C=3.1663157894736838,cache_size = 5000\n","          ,class_weight = \"balanced\"\n","         ).fit(Xtrain, Ytrain)\n","result = clf.predict(Xtest)\n","score = clf.score(Xtest,Ytest)\n","recall = recall_score(Ytest, result)\n","auc = roc_auc_score(Ytest,clf.decision_function(Xtest))\n","print(\"testing accuracy %f,recall is %f', auc is %f\" % (score,recall,auc))\n","print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["testing accuracy 0.795333,recall is 0.772595', auc is 0.870165\n","00:19:125873\n"],"name":"stdout"}]},{"metadata":{"id":"3Lq43kjTJG1u","colab_type":"code","colab":{}},"cell_type":"code","source":["from sklearn.metrics import roc_curve as ROC\n","import matplotlib.pyplot as plt"],"execution_count":0,"outputs":[]},{"metadata":{"id":"mtVfeCHPJG10","colab_type":"code","colab":{}},"cell_type":"code","source":["FPR, Recall, thresholds = ROC(Ytest,clf.decision_function(Xtest),pos_label=1)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"TXXxuh65JG11","colab_type":"code","colab":{}},"cell_type":"code","source":["area = roc_auc_score(Ytest,clf.decision_function(Xtest))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"aWZMxG8AJG13","colab_type":"code","colab":{},"outputId":"8ab217ae-00cc-44bc-b453-6ce3d1d4e2d0"},"cell_type":"code","source":["area"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.8701653769298805"]},"metadata":{"tags":[]},"execution_count":157}]},{"metadata":{"id":"xp7cFZIVJG15","colab_type":"code","colab":{},"outputId":"4f165510-28ae-4100-a2bf-8a9e78681f13"},"cell_type":"code","source":["plt.figure()\n","plt.plot(FPR, Recall, color='red',\n","         label='ROC curve (area = %0.2f)' % area)\n","plt.plot([0, 1], [0, 1], color='black', linestyle='--')\n","plt.xlim([-0.05, 1.05])\n","plt.ylim([-0.05, 1.05])\n","plt.xlabel('False Positive Rate')\n","plt.ylabel('Recall')\n","plt.title('Receiver operating characteristic example')\n","plt.legend(loc=\"lower right\")\n","plt.show()"],"execution_count":0,"outputs":[{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAAAYUAAAEWCAYAAACJ0YulAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAIABJREFUeJzt3XmcT/X+wPHXu5lCJdnqJsvYzRBTJnvWbJVoJ1fxG4RUSIt2kmuNZMQkKYpKudR1L6nkJtKUSZYw2ZeyxCBLY7x/f5wzc7/GLN9hvtvM+/l4ePiecz7fc97nzPf7fZ/P53PO54iqYowxxgBcFOgAjDHGBA9LCsYYY9JZUjDGGJPOkoIxxph0lhSMMcaks6RgjDEmnSWFfEBEuorI4kDHEWgiUl5EjolImB+3GSEiKiLh/tqmL4nIOhFpfh7vy7efQRFpLiK7Ah2Hv1hSyGMisk1ETrg/Tr+JyAwRudyX21TV91S1jS+3EYzcY31z2rSq7lDVy1U1NZBxBYqbnKpcyDpUtaaqLs1hO+ckwoL6GcyPLCn4RgdVvRyIBq4HhgQ4nvMSyLPf/HLmnRt2vE0wsKTgQ6r6G7AIJzkAICKFRGSsiOwQkd9FZIqIFPFY3lFEEkXkiIj8KiLt3PnFROQtEdkrIrtFZHhaM4mIdBeRb9zXU0RkrGccIjJfRAa5r8uIyMcisl9EtorIox7lXhKRuSIyS0SOAN0z7pMbx7vu+7eLyHMicpFHHMtF5HURSRaRX0SkVYb3ZrcPy0VkvIj8AbwkIpVF5EsROSgiB0TkPRG50i0/EygPfOrWyp7MeAYrIktF5GV3vUdFZLGIlPKI5wF3Hw6KyPMZax4Z9ruIiIxzyyeLyDeefzegq/s3PSAiz3q8r56IrBCRw+5+TxKRSzyWq4g8LCKbgc3uvNdEZKf7GfhBRG7yKB8mIs+4n42j7vJyIrLMLfKTezzuc8vf5n6eDovItyJS22Nd20TkKRFZA/wpIuGex8CNPcGN43cRedV9a9q2Drvbauj5GXTfW1NEPheRP9z3PpPFcc3y++DGttLj79lXnOatwu70R+LUxpNFZJmI1PRY7wwRmSwi/3ZjXC4ifxORCSJyyP1sXp/hWAwRkfXu8rfTtpNJzFl+h/IFVbV/efgP2Abc7L4uC/wMvOaxfAKwACgBFAU+Bf7hLqsHJAOtcRL2tUANd9k/ganAZcBVwCrgIXdZd+Ab93VTYCcg7nRx4ARQxl3nD8ALwCVAJWAL0NYt+xKQAnRyyxbJZP/eBea7sUcAm4BYjzhOAwOBi4H73P0p4eU+nAYeAcKBIkAV91gUAkrj/BhNyOxYu9MRgALh7vRS4Fegmru+pcBId1kUcAxo4h6Lse6+35zF3zXOff+1QBjQyI0rbZtvutuoA5wCIt331QUauPsUAWwABnisV4HPcT4PRdx5fwdKuu95HPgNKOwuewLnM1UdEHd7JT3WVcVj3TcA+4D6bswPuseskMfxSwTKeWw7/ZgCK4Bu7uvLgQaZHedMPoNFgb1u7IXd6fpZHNfsvg8XuX/zl4CqwCHgeo/3/p/7nkLuehI9ls0ADrjHvzDwJbAVeMA9FsOBrzJ8lta6x6IEsBwY7i5rDuzyiCnL71B++BfwAPLbP/fDdQw46n5xvgCudJcJ8CdQ2aN8Q2Cr+3oqMD6TdV6N80NTxGNel7QPdYYvpAA7gKbudC/gS/d1fWBHhnUPAd52X78ELMtm38LcOKI85j0ELPWIYw9uQnLnrQK6ebkPO7LatlumE7A6w7HOKSk857G8H/Af9/ULwGyPZZcCf5FJUnB/CE4AdTJZlrbNshn2uXMW+zAAmOcxrUDLHPb7UNq2gY1AxyzKZUwKbwAvZyizEWjmcfz+L5PPb1pSWAYMBUplsc9ZJYUunn+nbPYr2++Dx7b+wEmmQ7JZ15VuTMXc6RnAmx7LHwE2eExfBxzOsN99PKZvAX51Xzfnf0kh2+9Qfvhn7Yi+0UlVl4hIM+B9oBRwGOds91LgBxFJKys4P7bgnKUszGR9FXDOvPd6vO8inBrBWVRVRWQOzhdzGXA/MMtjPWVE5LDHW8KA/3pMn7NOD6Vwzo62e8zbjnP2nGa3ut8Uj+VlvNyHs7YtIlcBE4GbcM4IL8L5gcyN3zxeH8c548WNKX17qnpcRA5msY5SOGebv+Z2OyJSDXgViMH524fjnGl6yrjfjwM93RgVuMKNAZzPSHZxeKoAPCgij3jMu8Rdb6bbziAWGAb8IiJbgaGq+pkX2/U2xpy+D6jqNhH5CudHOi69kNPs+Apwj7ueM+6iUji1U4DfPbZ1IpPpjBeAeB6LtM9tRt58h0Ka9Sn4kKp+jXPGktbGfwDnw1hTVa90/xVTp1ManA9l5UxWtRPnLLuUx/uuUNWamZQFmA3cLSIVcM5sPvZYz1aPdVypqkVV9RbPsLPZpQM4TSwVPOaVB3Z7TF8rHt9wd/keL/ch47b/4c6rrapX4DSrSDblc2MvTvMe4PQZ4DTZZOYAcJLM/zY5eQP4Bajq7sMznL0P4LEfbv/BU8C9QHFVvRLnRy7tPVl9RjKzE3glw9/7UlWdndm2M1LVzaraBaepbxQwV0Quy+49uYwxp+8DInILTu3hC2CMx3vvBzoCNwPFcGoUcO6xzY1yHq/TPrcZefMdCmmWFHxvAtBaRKJV9QxO2/N49ywYEblWRNq6Zd8CeohIKxG5yF1WQ1X3AouBcSJyhbusslsTOYeqrgb2A9OARaqadlazCjjiduAVcTsta4nIjd7siDqXen4IvCIiRd2kM4j/1UTA+QF5VEQuFpF7gEhgYW73wVUUpynusIhci9Oe7ul3nDbd8zEX6CAijcTp+B1KFj8o7t9tOvCq28kY5nauFvJiO0WBI8AxEakB9PWi/Gmcv1+4iLyAU1NIMw14WUSqiqO2iKQls4zH402gj4jUd8teJiK3ikhRL+JGRP4uIqXd/U/7DKW6sZ0h62P/GfA3ERngdiQXFZH6GQvl9H0Q56KAt3BqTQ/i/L3SfnyL4pxkHMSpbYzwZp9y8LCIlBWREjjJ+4NMylzQdygUWFLwMVXdj9M5+7w76ykgCVgpzhU+S3A6DVHVVUAPYDzO2eHX/O+s/AGcqv96nCaUucA12Wx6Ns5Z1PsesaQCHXCuhtqKc6Y2DedMy1uP4LQDbwG+cdc/3WP5dzidggdwqvd3q2pas0xu92EoTmdpMvAv4JMMy/8BPCfOlTWDc7EPqOo6d1/m4NQajuJ0yp7K4i2DcTp4v8dp4x6Fd9+fwThntUdxfgAz+6HxtAj4N04H/nacGopns8arOIl5MU6yeQungxucPqF33ONxr6om4PQpTcI53klkckVZNtoB60TkGPAaTj/JSVU9jvO3Xe5uq4Hnm1T1KM4FAh1wmtU2Ay2y2EaW3wcgHpivqgvdz1AsMM1Ngu+6x2c3zudpZS72Kyvv4xzXLe6/4RkL5NF3KKilXaFizAUTke5AT1VtEuhYckucGwwP4zTzbA10PMa/RGQbzmd3SaBjCTSrKZgCS0Q6iMilbjv5WJyawLbARmVMYFlSMAVZR5zOxD04TV6d1arOpoCz5iNjjDHprKZgjDEmXcjdvFaqVCmNiIgIdBjGGBNSfvjhhwOqWjqnciGXFCIiIkhISAh0GMYYE1JEZHvOpaz5yBhjjAdLCsYYY9JZUjDGGJPOkoIxxph0lhSMMcak81lSEJHpIrJPRNZmsVxEZKKIJInIGhG5wVexGGOM8Y4vawozcEZZzEp7nKEFqgK9ccadN8YYE0A+u09BVZeJSEQ2RToC77pjzawUkStF5Bp33H1jjMkb8fHw/vs5lwtif6amsj8lhYgGDWDCBJ9uK5A3r13L2ePE73LnnZMURKQ3Tm2C8uXL+yU4Y0wA5eUP+ddfO/83y+55TsHry0OH6LVpE8XCw0moX9/nHcGBTAqZPeUq09H5VDUe54EbxMTE2Ah+xuRHnokgL3/ImzWD+++H3r0vfF1+dPjwYZ544gmmTZtGlSpVGD9tGhf5IbEFMins4uxnopYl82eiGmOCla/O6EP0hzyvpKam0qhRIzZu3MiTTz7JSy+9RJEiRXJ+Yx4IZFJYAPQXkTk4D5dPtv4EY/KAP9vQ7Yw+Tx08eJASJUoQFhbGK6+8Qrly5YiJifFrDD5LCiIyG2gOlBKRXcCLwMUAqjoFWAjcgvN81uM4zyY2xmTF2x97f7ah2w95nlBV3nvvPR577DFGjhxJr169uOOOOwISiy+vPuqSw3IFHvbV9o0JWVn9+Hv7Y28/1CFl586d9OnTh4ULF9KgQQMaN24c0HhCbuhsY/IlbzpZ7cc+35k9ezYPPfQQqampTJgwgf79+xMWFhbQmCwpGBMIGWsD1slaIBUvXpz69esTHx9PxYoVAx0OEILPaI6JiVF7yI4JCdn1AWRWG7BEkO+dPn2a8ePH89dff/Hss88CTn+CSGZX6OctEflBVXPstbaagjF5LS0ZZNcHYLWBAuenn34iNjaWH374gXvvvTc9GfgjIeSGJQVjzpc3HcL2w1/gnTp1iuHDhzNy5EhKlCjBRx99xF133RV0ySCNJQVjzkd8PDz0kPPaOoRNNjZv3syoUaO4//77efXVVylZsmSgQ8qWJQVjsuJNn8DUqfbjb85x7Ngx5s+fT9euXalVqxa//PILlSpVCnRYXrGH7BiTUXw8NG/u1ATSfvwzatbMEoLJ1Oeff851111Ht27d2LBhA0DIJASwmoIx2V8eas1AxkuHDh1i8ODBTJ8+nWrVqvH1118TGRkZ6LByzZKCKVgyaxLKeJWQJQOTS6mpqTRu3JhNmzYxZMgQXnjhBQoXLhzosM6LJQVTsLz/PiQmQnT0/+ZZEjDn6cCBA+kD2I0YMYLy5ctzww2h/WRhSwomf8lp0Li0hLB0qd9CMvmPqjJz5kwGDBjAyJEj6d27N506dQp0WHnCkoIJfbl5OEt0tFMrMOY8bd++nYceeohFixbRqFEjmjZtGuiQ8pQlBRN6bNwgEyCzZs2ib9++qCqvv/46/fr146KL8tdFnJYUTPDLLgmk/W+JwPhB6dKlady4MVOnTqVChQqBDscnbEA8E/yaNz+3c9iSgPGDlJQUxo0bR0pKCs8//zzgvwHs8poNiGfyh/h4p2bQrJl1Dhu/Wr16NbGxsaxevZrOnTsH7QB2ec2SggkuWTUVWeew8ZOTJ08ybNgwRo8eTalSpfj444+58847Ax2W3+SvHhIT2tIGmfMcWsKGkzB+lpSUxNixY3nggQfYsGFDgUoIYDUFEwwyPn/AkoDxs2PHjjFv3jy6detGrVq12LhxY9A8Cc3fLCkY//D2KWTWgWz8bNGiRfTu3ZudO3cSExNDZGRkgU0IYEnB+JK3N5VZMjABcPDgQQYNGsS7775LjRo1+O9//xuSA9jlNUsKxnc8xxmyH34TRNIGsEtKSuLZZ5/lueeeC9kB7PKaJQWTtzxrBzbOkAky+/fvp2TJkoSFhTFq1CgqVKhAtOf9L8auPjJ5IO2hNBkfTGPjDJkgoaq8/fbbVKtWjTfffBOAjh07WkLIhNUUzPnLeNWQjT1kgtC2bdvo3bs3n3/+OTfddBMtWrQIdEhBzZKCyZ2sOo8tEZggNHPmTPr27YuIMHnyZB566KF8N4BdXrOkYLyXdnMZWK3AhISrr76apk2bMmXKFMqXLx/ocEKCJQXjHc+EYDeXmSCVkpLC6NGjSU1N5YUXXqBNmza0adMm0GGFFKtHmeyldSJbQjBB7scff+TGG2/kueeeY+PGjYTaCNDBwmoKJnOZdSJbU5EJQidOnGDo0KGMHTuW0qVLM2/evHzzaMxA8GlSEJF2wGtAGDBNVUdmWF4eeAe40i3ztKou9GVMxoMNPWHygS1btvDqq6/SvXt3xowZQ/HixQMdUkjzWVIQkTAgDmgN7AK+F5EFqrreo9hzwIeq+oaIRAELgQhfxWSwoSdMvnDkyBE++eQTunfvTs2aNdm8eXO+fRKav/myplAPSFLVLQAiMgfoCHgmBQWucF8XA/b4MB5jVw+ZfGDhwoX06dOH3bt3U79+fSIjIy0h5CFfJoVrgZ0e07uA+hnKvAQsFpFHgMuAmzNbkYj0BnoDdlnZ+bChqU0+cODAAQYOHMisWbOIiopi+fLlNoCdD/jy6qPMnlmX8XKALsAMVS0L3ALMFJFzYlLVeFWNUdWY0qVL+yDUfC5tYDp7YI0JUWkD2M2ZM4cXXniBH3/8kQYNGgQ6rHzJlzWFXUA5j+mynNs8FAu0A1DVFSJSGCgF7PNhXAVHWg3BBqYzIer333+ndOnShIWFMXbsWCpUqEDt2rUDHVa+5suawvdAVRGpKCKXAJ2BBRnK7ABaAYhIJFAY2O/DmAoWz4RgA9OZEKKqvPXWW1SvXp34+HgAOnToYAnBD3xWU1DV0yLSH1iEc7npdFVdJyLDgARVXQA8DrwpIgNxmpa6q91xcuGshmBC2JYtW+jVqxdffvklzZo14+abM+1qND7i0/sU3HsOFmaY94LH6/VAY1/GUOBkvMLIaggmhLzzzjv069ePsLAwpkyZQq9evWwAOz+zO5rzm7R7EKxD2YSgMmXK0LJlS9544w3Kli0b6HAKJEsK+Ul8vHPZabNmlhBMSPjrr78YOXIkZ86c4aWXXqJ169a0bt060GEVaFYvy0/SagnWZGRCwPfff0/dunV58cUX2bJliw1gFyQsKeQHaSOZpt2LYLUEE8SOHz/O4MGDadCgAYcOHWLBggW8++67iGR2a5PxN2s+CnXWsWxCzNatW3n99dfp1asXo0aNolixYoEOyXiwpBDqrGPZhIDk5GQ++eQTevToQc2aNUlKSqJcuXI5v9H4nTUfhSprMjIh4l//+hc1a9akZ8+e/PLLLwCWEIKYJYVQlNZk9PXXdreyCVr79++na9eu3HbbbRQvXpwVK1ZQo0aNQIdlcmDNR6HERjs1ISI1NZUmTZqwdetWhg4dytNPP80ll1wS6LCMFywphAJ7NKYJEb/99htXXXUVYWFhjBs3joiICGrVqhXosEwuWPNRsPNsKkob+nrpUksIJqicOXOGqVOnUq1aNaZOnQrAbbfdZgkhBFlNIRhl9shMayoyQSopKYlevXqxdOlSWrZsSdu2bQMdkrkAVlMIRmkjnII9GMcEtbfffpvrrruOH3/8kTfffJMlS5ZQqVKlQIdlLoDVFIKVDXltQkD58uVp27YtcXFxXHvttYEOx+QBSwrBxnNQO2OCzKlTp/jHP/7BmTNnGDZsGK1ataJVq1aBDsvkIWs+ChZpN6OlDVlh9x6YIPPdd99Rt25dhg4dyo4dO2wAu3zKkkKwSOtHsD4EE2T+/PNPBg0aRMOGDUlOTuazzz5jxowZNoBdPmXNR8HAs8nI+hFMkNm+fTuTJ0+mT58+jBw5kiuuuCLQIRkfsqQQaJ6jnFqTkQkShw8fZu7cufTs2ZOoqCiSkpLsSWgFhDUfBZqNcmqCzPz584mKiqJPnz7pA9hZQig4LCkEAxvl1ASBffv20blzZzp16kTp0qVZuXKlDWBXAFnzkTGG1NRUGjduzI4dOxg+fDhPPvkkF198caDDMgFgSSGQ7J4EE2B79uzhb3/7G2FhYbz22mtEREQQFRUV6LBMAFnzUSCl9SdYB7PxszNnzvDGG29Qo0YNpkyZAsAtt9xiCcFYUggYz1qC9ScYP9q0aRMtWrSgX79+1K9fn/bt2wc6JBNErPnInzIb/dRqCcaP3nrrLfr370/hwoWZPn063bt3t5vQzFmspuBPNvqpCbCIiAjat2/P+vXr6dGjhyUEcw6rKfhDWg0hMdFGPzV+derUKV5++WUAhg8fbgPYmRxZTcHXPJ+cFh1tzUXGb7799luio6N55ZVX2Lt3rw1gZ7xiScGXPIewsMdoGj85duwYjz32GE2aNOH48eP85z//4a233rKmIuMVnyYFEWknIhtFJElEns6izL0isl5E1onI+76Mx+9sCAsTADt27GDq1Kk8/PDDrF271h6PaXLFZ30KIhIGxAGtgV3A9yKyQFXXe5SpCgwBGqvqIRG5ylfxBIxdcmr84NChQ3z00Uf07t2bqKgotmzZQpkyZQIdlglBvqwp1AOSVHWLqv4FzAE6ZijTC4hT1UMAqrrPh/EYky/NmzePqKgo+vXrx8aNGwEsIZjz5sukcC2w02N6lzvPUzWgmogsF5GVItIusxWJSG8RSRCRhP379/so3DyWdnOaMT7y22+/cc8993DnnXfyt7/9jVWrVlG9evVAh2VCnC8vSc2sVyvj5Q/hQFWgOVAW+K+I1FLVw2e9STUeiAeIiYkJjUsobAgL40OpqancdNNN7Ny5kxEjRjB48GAbwM7kCV8mhV1AOY/pssCeTMqsVNUUYKuIbMRJEt/7MC7fsyEsjI/s2rWLMmXKEBYWxsSJE6lYsaINb23ylC+bj74HqopIRRG5BOgMLMhQ5p9ACwARKYXTnLTFhzH5h9USTB47c+YMr7/+OjVq1OCNN94AoH379pYQTJ7zWVJQ1dNAf2ARsAH4UFXXicgwEbndLbYIOCgi64GvgCdU9aCvYvILqyWYPPbLL7/QtGlTHn30UZo0acJtt90W6JBMPubTYS5UdSGwMMO8FzxeKzDI/Rf67HnLJo9NmzaN/v37c+mll/LOO+/QrVs3uwnN+JSNfZSX7GY1k8cqV65Mhw4dmDRpEldffXWgwzEFgITaeCgxMTGakJAQ6DDOZgPemTxy8uRJhg0bBsCIESMCHI3JT0TkB1WNyalctjUFEcm2WUdVX81tYPlKWjJIux+hWTNrNjLnbfny5cTGxrJx40Z69uyJqlpTkfG7nJqPivolilCVVjtISwbWZGTOw9GjR3nmmWeIi4ujQoUKLFq0iDZt2gQ6LFNAZZsUVHWovwIJOZ5XGVlzkbkAu3btYtq0aTzyyCO88sorXH755YEOyRRgOTUfTcxuuao+mrfhhBC7F8FcgIMHD/Lhhx/St29fIiMj2bJlC9dcc02gwzImx+ajH/wSRaixexHMeVJVPv74Yx5++GH++OMPWrZsSfXq1S0hmKCRU/PRO/4KJKRYLcGch7179/Lwww8zb9486taty+LFi20AOxN0vLpPQURKA08BUUDhtPmq2tJHcQUvqyWY85A2gN3u3bsZPXo0AwcOJDzcbhMywcfbT+V7wAfArUAf4EEgRMawzkN2x7LJpZ07d3LttdcSFhZGXFwcFStWpFq1aoEOy5gseTv2UUlVfQtIUdWvVfX/gAY+jCs42R3LxkupqalMnDjxrAHs2rZtawnBBD1vawop7v97ReRWnCGwy/ompCBnzUYmBxs2bCA2NpYVK1bQvn17OnToEOiQjPGat0lhuIgUAx4HXgeuAAb6LCpjQlR8fDyPPPIIRYsWZebMmXTt2tXuSjYhxaukoKqfuS+TcZ9/YIw5V9WqVbnjjjuYOHEiV111VaDDMSbXvOpTEJF3RORKj+niIjLdd2EFIXvmssnEiRMneOqpp3j66acBaNGiBXPmzLGEYEKWtx3NtT2fm6yqh4DrfRNSkLJ7E0wGy5Yto06dOowePZrk5GRCbcRhYzLjbVK4SESKp02ISAkK4rMYrJPZAEeOHKFfv340a9aM1NRUvvjiC9544w3rOzD5grc/7OOAb0VkLqDAvcArPovKmCC2Z88eZsyYwaBBgxg2bBiXXXZZoEMyJs9429H8rogkAC0BAe5U1fU+jcyYIHLgwAE+/PBD+vXrR40aNdi6das9Cc3kS942HwGUAP5U1deB/SJS0UcxGRM0VJUPPviAqKgoBgwYwKZNmwAsIZh8y9urj17EGftoiDvrYmCWr4IyJhjs2bOHTp060blzZypUqMAPP/xgdySbfM/bPoU7cK42+hFAVfeIiD2VzeRbqampNG3alN27dzN27Fgee+wxG8DOFAjefsr/UlUVEQUQEetZM/nS9u3bKVu2LGFhYUyePJlKlSpRpUqVQIdljN9426fwoYhMBa4UkV7AEmCa78IKMnbjWr6XmprKq6++SmRkZPoAdm3atLGEYAocb68+GisirYEjQHXgBVX93KeRBRO7cS1fW7t2LbGxsaxatYrbbruNTp06BTokYwLG60ZSNwl8DiAiYSLSVVXf81lkwcZuXMuXpkyZwqOPPkqxYsV4//336dy5s92EZgq0bJuPROQKERkiIpNEpI04+gNbcG5gMyYkpQ1JERkZyT333MP69evp0qWLJQRT4OVUU5gJHAJWAD2BJ4BLgI6qmujj2IzJc8ePH+eFF14gLCyMUaNG0axZM5o1axbosIwJGjl1NFdS1e6qOhXoAsQAtxWohGCdzPnG0qVLqV27NuPGjePYsWM2gJ0xmcgpKaQ9cQ1VTQW2qupR34YUZKyTOeQlJyfz0EMP0aKF8yiQL7/8kri4OGsqMiYTOTUf1RGRI+5rAYq40wKoql7h0+iChXUyh7S9e/cya9YsBg8ezNChQ7n00ksDHZIxQSvbmoKqhqnqFe6/oqoa7vE6x4QgIu1EZKOIJInI09mUu1tEVERizmcnjMlo//79vP766wDUqFGDbdu2MWbMGEsIxuQgNwPi5YqIhAFxQHsgCugiIlGZlCsKPAp856tYzpv1J4QcVeX9998nMjKSxx9/PH0Au9KlSwc4MmNCg8+SAlAPSFLVLar6FzAH6JhJuZeB0cBJH8aSe/Hx8NBDzmvrTwgJO3fupEOHDnTt2pUqVaqwevVqG8DOmFzyZVK4FtjpMb3LnZdORK4HyqnqZ9mtSER6i0iCiCTs378/7yPNyDMhTJ1q/Qkh4PTp0zRv3pyvvvqK8ePHs3z5cmrWrBnosIwJOb4c9jGzSzvSrwEUkYuA8UD3nFakqvFAPEBMTIzvryNMu+LIEkLQ27ZtG+XKlSM8PJypU6dSqVIlKlWqFOiwjAlZvqwp7ALKeUyXBfZ4TBcFagFLRWQb0ABYEPDO5rR+BLviKKidPn2asWPHEhkZyeTJkwG4+eabLSEYc4F8WVP4HqjqPqFtN9AZSG+cV9VkoFTatIgsBQaraoIPY8qZ3ZcQ9NasWUNsbCwJCQl07NiRu+66K9AuEEM3AAAbB0lEQVQhGZNv+KymoKqngf7AImAD8KGqrhORYSJyu6+2myeslhC0Jk+eTN26ddm+fTsffPAB8+bNo0yZMoEOy5h8w6ePklLVhcDCDPNeyKJsc1/GkqP4eKeWkJgI0dEBDcWcS1UREWrVqkXnzp0ZP348pUqVyvmNxphcsecLwtlXGzVrZk1HQeTPP//kueeeIzw8nDFjxtC0aVOaNm0a6LCMybd82dEcOjyvNlq61JqOgsQXX3zBddddx4QJEzh16pQNYGeMH1hSSGP9CEHj8OHD9OzZk5tvvpnw8HCWLVvGxIkTbQA7Y/zAkoIJOr///jtz5szhqaee4qeffuKmm24KdEjGFBjWp2CCQloieOyxx6hevTrbtm2zjmRjAsBqCiagVJVZs2YRFRXFk08+yebNmwEsIRgTIJYUbCTUgNmxYwe33nor3bp1o3r16iQmJlK1atVAh2VMgWbNR3YHc0CkDWC3b98+Jk6cSL9+/QgLCwt0WMYUeJYUwK488qMtW7ZQoUIFwsPDefPNN6lcuTIRERGBDssY47LmI+MXp0+fZtSoUURFRREXFwdAq1atLCEYE2QKdlKw/gS/SExMpH79+jz99NPccsst3HPPPYEOyRiThYKdFKw/wecmTZrEjTfeyO7du5k7dy6ffPIJ11xzTaDDMsZkoWAnBbD+BB9JG5Kidu3adO3alfXr19sQ18aEAOtoNnnq2LFjPPvss1x88cWMHTvWBrAzJsQU3JqC9SfkucWLF1OrVi1ef/11UlJSbAA7Y0JQwU0K1p+QZw4dOkSPHj1o27YthQsXZtmyZbz22ms2gJ0xIajgJgWw/oQ8sm/fPubOncuQIUNITEykSZMmgQ7JGHOerE/BnJfffvuN2bNnM3DgwPQB7EqWLBnosIwxF6hg1xRMrqkq77zzDlFRUQwZMiR9ADtLCMbkD5YUjNe2bdtGu3bt6N69O1FRUTaAnTH5kDUfGa+cPn2aFi1acODAAeLi4ujTpw8XXWTnFMbkN5YUTLaSkpKoWLEi4eHhTJ8+nUqVKlGhQoVAh2WM8ZGCeapn9yjkKCUlhREjRlCzZs30AexatGhhCcGYfK5g1hTsHoVs/fjjj8TGxpKYmMg999zDfffdF+iQjDF+UjBrCmD3KGRh4sSJ1KtXj99++41PPvmEDz/8kKuvvjrQYRlj/KTgJgVzlrQhKa6//noeeOAB1q9fzx133BHgqIwx/lYwm49MuqNHjzJkyBAKFSrEuHHjuOmmm7jpppsCHZYxJkCsplCA/ec//6FWrVpMnjwZVbUB7IwxlhQKooMHD/Lggw/Svn17LrvsMpYvX86rr75qA9gZYwpgUrDLUTl48CDz5s3j+eefZ/Xq1TRs2DDQIRljgoRPk4KItBORjSKSJCJPZ7J8kIisF5E1IvKFiPj+IvgCejnq3r17GTt2LKpKtWrV2L59O8OGDaNQoUKBDs0YE0R8lhREJAyIA9oDUUAXEYnKUGw1EKOqtYG5wGhfxXOWAnQ5qqoyffp0IiMjef7550lKSgKgePHiAY7MGBOMfFlTqAckqeoWVf0LmAN09Cygql+p6nF3ciVQ1ofxFDhbt26lTZs2xMbGUqdOHX766ScbwM4Yky1fXpJ6LbDTY3oXUD+b8rHAvzNbICK9gd4A5cuXz6v48rXTp0/TsmVLDh48yBtvvEHv3r1tADtjTI58mRQyu5Ql02seReTvQAzQLLPlqhoPxAPExMTYdZPZ2Lx5M5UqVSI8PJy3336bypUrU65cuUCHZYwJEb48ddwFeP4alQX2ZCwkIjcDzwK3q+opH8aTr6WkpDB8+HBq1arFpEmTAGjevLklBGNMrvgyKXwPVBWRiiJyCdAZWOBZQESuB6biJIR9PozFkU8vR01ISCAmJobnn3+eO++8ky5dugQ6JGNMiPJZUlDV00B/YBGwAfhQVdeJyDARud0tNga4HPhIRBJFZEEWq8sb+fBy1Ndee4369etz4MAB5s+fz+zZs7nqqqsCHZYxJkT5dOwjVV0ILMww7wWP1zf7cvuZyieXo6oqIkJMTAyxsbGMHj2aK6+8MtBhGWNCnA2IF2KOHDnCU089ReHChRk/fjyNGzemcePGgQ7LGJNP2DWKIWThwoXUrFmT+Ph4wsPDbQA7Y0yes6QQAg4cOMDf//53br31VooVK8a3337LmDFjbAA7Y0yes6QQAg4dOsSnn37Kiy++yI8//kj9+tndA2iMMefP+hSC1O7du3nvvfd44oknqFq1Ktu3b7eOZGOMz1lNIcioKm+++SZRUVG89NJL/PrrrwCWEIwxfmFJIYj8+uuvtGrVit69e3PDDTewZs0aqlSpEuiwjDEFiDUfBYnTp0/TqlUr/vjjD6ZOnUrPnj1tADtjjN9ZUgiwjRs3UrlyZcLDw3nnnXeoXLkyZcvaCOLGmMCwU9EA+euvvxg6dCjXXXcdcXFxADRr1swSgjEmoKymEACrVq0iNjaWtWvXcv/999O1a9dAh2SMMYDVFPxuwoQJNGzYMP3eg/fee49SpUoFOixjjAEsKfhN2pAU9erVo1evXqxbt47bbrstwFEZY8zZrPnIx5KTk3nyyScpUqQIEyZMoFGjRjRq1CjQYRljTKaspuBDn376KVFRUUybNo1ChQrZAHbGmKBnScEH9u/fz/3338/tt99OyZIlWblyJaNGjbIB7IwxQc+Sgg8kJyezcOFChg4dSkJCAjfeeGOgQzLGGK9Yn0Ie2blzJ7NmzeLpp5+mSpUqbN++nWLFigU6LGOMyRWrKVygM2fOMGXKFGrWrMnw4cPTB7CzhGCMCUWWFC7A5s2badmyJX379qVevXr8/PPPNoCdMSakWfPReTp9+jStW7fm8OHDvPXWW/To0cM6ko0xIc+SQi5t2LCBqlWrEh4ezsyZM6lcuTJlypQJdFgmCKWkpLBr1y5OnjwZ6FBMAVK4cGHKli3LxRdffF7vt6TgpVOnTjFixAhGjBjBmDFjGDBgADfddFOgwzJBbNeuXRQtWpSIiAirRRq/UFUOHjzIrl27qFix4nmtw5KCF1auXElsbCzr16+nW7dudOvWLdAhmRBw8uRJSwjGr0SEkiVLsn///vNeh3U052DcuHE0atSIo0ePsnDhQt59911KliwZ6LBMiLCEYPztQj9zlhSycObMGQAaNmxInz59WLt2Le3btw9wVMYY41sFJynEx8PXX+dY7PDhw8TGxvLYY48B0KhRIyZPnswVV1zh6wiNyXNhYWFER0dTq1YtOnTowOHDh9OXrVu3jpYtW1KtWjWqVq3Kyy+/fNb4XP/+97+JiYkhMjKSGjVqMHjw4EDsQrZWr15Nz549Ax1Gtv7xj39QpUoVqlevzqJFizIt88UXX3DDDTcQHR1NkyZNSEpKAmDgwIFER0cTHR1NtWrVuPLKKwFnKJ127dr5JmBVDal/devW1fPSrJkqqE6dmmWRefPm6TXXXKNhYWE6ZMgQPXPmzPltyxhVXb9+faBD0Msuuyz99QMPPKDDhw9XVdXjx49rpUqVdNGiRaqq+ueff2q7du100qRJqqr6888/a6VKlXTDhg2qqpqSkqJxcXF5GltKSsoFr+Puu+/WxMREv24zN9atW6e1a9fWkydP6pYtW7RSpUp6+vTpc8pVrVo1/fMSFxenDz744DllJk6cqD169Eif7t69u37zzTeZbjezzx6QoF78xhasjuZmzaB373Nm79u3j/79+/PRRx8RHR3NZ599xg033BCAAE2+NWAAJCbm7Tqjo2HCBK+LN2zYkDVr1gDw/vvv07hxY9q0aQPApZdeyqRJk2jevDkPP/wwo0eP5tlnn6VGjRoAhIeH069fv3PWeezYMR555BESEhIQEV588UXuuusuLr/8co4dOwbA3Llz+eyzz5gxYwbdu3enRIkSrF69mujoaObNm0diYmL6GXCVKlVYvnw5F110EX369GHHjh2A83Cqxo0bn7Xto0ePsmbNGurUqQM4TzQcMGAAJ06coEiRIrz99ttUr16dGTNm8K9//YuTJ0/y559/8uWXXzJmzBg+/PBDTp06xR133MHQoUMB6NSpEzt37uTkyZM89thj9M7k9yI35s+fT+fOnSlUqBAVK1akSpUqrFq1ioYNG55VTkQ4cuQI4Iydltll7rNnz06PMy3W995775zjcqEKVlLIwpEjR/j888955ZVXeOKJJ877+l5jglVqaipffPEFsbGxgNN0VLdu3bPKVK5cmWPHjnHkyBHWrl3L448/nuN6X375ZYoVK8bPP/8MwKFDh3J8z6ZNm1iyZAlhYWGcOXOGefPm0aNHD7777jsiIiK4+uqruf/++xk4cCBNmjRhx44dtG3blg0bNpy1noSEBGrVqpU+XaNGDZYtW0Z4eDhLlizhmWee4eOPPwZgxYoVrFmzhhIlSrB48WI2b97MqlWrUFVuv/12li1bRtOmTZk+fTolSpTgxIkT3Hjjjdx1113nXFgycOBAvvrqq3P2q3Pnzjz99NNnzdu9ezcNGjRIny5btiy7d+8+573Tpk3jlltuoUiRIlxxxRWsXLnyrOXbt29n69attGzZMn1eTEwMzz33XE6HO9cKbFLYsWMHM2fO5JlnnqFKlSrs2LGDokWLBjosk1/l4ow+L504cYLo6Gi2bdtG3bp1ad26NeA0G2d1lUpurl5ZsmQJc+bMSZ8uXrx4ju+55557CAsLA+C+++5j2LBh9OjRgzlz5nDfffelr3f9+vXp7zly5AhHjx496zu6d+9eSpcunT6dnJzMgw8+yObNmxERUlJS0pe1bt2aEiVKALB48WIWL17M9ddfDzi1nc2bN9O0aVMmTpzIvHnzAGeQy82bN5+TFMaPH+/dweF/T1z0lNnxHT9+PAsXLqR+/fqMGTOGQYMGMW3atPTlc+bM4e67704/bgBXXXUVe/bs8ToWb/m0o1lE2onIRhFJEpGnM1leSEQ+cJd/JyIRvowHnKuKJk+eTM2aNRkxYkT6AHaWEEx+VKRIERITE9m+fTt//fUXcXFxANSsWZOEhISzym7ZsoXLL7+cokWLUrNmTX744Ycc159VcvGcl/GO7ssuuyz9dcOGDUlKSmL//v3885//5M477wSc7+mKFStITEwkMTGR3bt3n/MdLVKkyFnrfv7552nRogVr167l008/PWuZ5zZVlSFDhqSvOykpidjYWJYuXcqSJUtYsWIFP/30E9dff32md6N7dv56/hs5cuQ5ZcuWLcvOnTvTp3ft2nVO09D+/fv56aefqF+/PuAkym+//fasMnPmzKFLly5nzTt58iRFihQ5Z5sXymdJQUTCgDigPRAFdBGRqAzFYoFDqloFGA+M8lU8ABuPH09vM23YsCHr1q2zAexMgVCsWDEmTpzI2LFjSUlJoWvXrnzzzTcsWbIEcGoUjz76KE8++SQATzzxBCNGjGDTpk2A8yP96quvnrPeNm3aMGnSpPTptOajq6++mg0bNqQ3D2VFRLjjjjsYNGgQkZGR6WflGdebmEl/TGRkZPpVOuDUFK699loAZsyYkeU227Zty/Tp09P7PHbv3s2+fftITk6mePHiXHrppfzyyy/nNOGkGT9+fHpC8fyXsekI4Pbbb2fOnDmcOnWKrVu3snnzZurVq3dWmeLFi5OcnJx+rD///HMiIyPTl2/cuJFDhw6d0w+xadOms5rP8oovawr1gCRV3aKqfwFzgI4ZynQE3nFfzwVaiY/u9jmtSts1a/j55595++23WbRoEREREb7YlDFB6frrr6dOnTrMmTOHIkWKMH/+fIYPH0716tW57rrruPHGG+nfvz8AtWvXZsKECXTp0oXIyEhq1arF3r17z1nnc889x6FDh6hVqxZ16tRJb2sfOXIkt912Gy1btuSaa67JNq777ruPWbNmpTcdAUycOJGEhARq165NVFQUU6ZMOed9NWrUIDk5maNHjwLw5JNPMmTIEBo3bkxqamqW22vTpg33338/DRs25LrrruPuu+/m6NGjtGvXjtOnT1O7dm2ef/75s/oCzlfNmjW59957iYqKol27dsTFxaU3Ad1yyy3s2bOH8PBw3nzzTe666y7q1KnDzJkzGTNmTPo6Zs+eTefOnc+pkX311VfceuutFxxjRpJZm1eerFjkbqCdqvZ0p7sB9VW1v0eZtW6ZXe70r26ZAxnW1RvoDVC+fPm627dvz31AAwbwze7dVJ44MccPqTF5YcOGDWed8Zm8N378eIoWLRr09yr4QtOmTZk/f36m/TiZffZE5AdVjclpvb6sKWR2xp8xA3lTBlWNV9UYVY3x7FjKlQkTaPLRR5YQjMlH+vbtS6FChQIdht/t37+fQYMGedWxn1u+TAq7gHIe02WBjF3l6WVEJBwoBvzhw5iMMflI4cKFC+QAlaVLl6ZTp04+Wbcvk8L3QFURqSgilwCdgQUZyiwAHnRf3w18qb5qzzImAOzjbPztQj9zPksKqnoa6A8sAjYAH6rqOhEZJiK3u8XeAkqKSBIwCDi3+96YEFW4cGEOHjxoicH4jbrPUyhcuPB5r8NnHc2+EhMToxmvrzYmGNmT10wgZPXkNW87mgvsHc3G+NrFF1983k+/MiZQCs7Q2cYYY3JkScEYY0w6SwrGGGPShVxHs4jsB87jlmYASgEHciyVv9g+Fwy2zwXDhexzBVXN8e7fkEsKF0JEErzpfc9PbJ8LBtvngsEf+2zNR8YYY9JZUjDGGJOuoCWF+EAHEAC2zwWD7XPB4PN9LlB9CsYYY7JX0GoKxhhjsmFJwRhjTLp8mRREpJ2IbBSRJBE5Z+RVESkkIh+4y78TkQj/R5m3vNjnQSKyXkTWiMgXIlIhEHHmpZz22aPc3SKiIhLyly96s88icq/7t14nIu/7O8a85sVnu7yIfCUiq93P9y2BiDOviMh0EdnnPpkys+UiIhPd47FGRG7I0wBUNV/9A8KAX4FKwCXAT0BUhjL9gCnu687AB4GO2w/73AK41H3dtyDss1uuKLAMWAnEBDpuP/ydqwKrgeLu9FWBjtsP+xwP9HVfRwHbAh33Be5zU+AGYG0Wy28B/o3z5MoGwHd5uf38WFOoBySp6hZV/QuYA3TMUKYj8I77ei7QSjI+FTu05LjPqvqVqh53J1fiPAkvlHnzdwZ4GRgN5Ifxq73Z515AnKoeAlDVfX6OMa95s88KXOG+Lsa5T3gMKaq6jOyfQNkReFcdK4ErRSTPnjOcH5PCtcBOj+ld7rxMy6jzMKBkoKRfovMNb/bZUyzOmUYoy3GfReR6oJyqfubPwHzIm79zNaCaiCwXkZUi0s5v0fmGN/v8EvB3EdkFLAQe8U9oAZPb73uu5MfnKWR2xp/xultvyoQSr/dHRP4OxADNfBqR72W7zyJyETAe6O6vgPzAm79zOE4TUnOc2uB/RaSWqh72cWy+4s0+dwFmqOo4EWkIzHT3+YzvwwsIn/5+5ceawi6gnMd0Wc6tTqaXEZFwnCpndtW1YOfNPiMiNwPPArer6ik/xeYrOe1zUaAWsFREtuG0vS4I8c5mbz/b81U1RVW3AhtxkkSo8mafY4EPAVR1BVAYZ+C4/Mqr7/v5yo9J4XugqohUFJFLcDqSF2QoswB40H19N/Cluj04ISrHfXabUqbiJIRQb2eGHPZZVZNVtZSqRqhqBE4/yu2qGsrPcvXms/1PnIsKEJFSOM1JW/waZd7yZp93AK0ARCQSJyns92uU/rUAeMC9CqkBkKyqe/Nq5fmu+UhVT4tIf2ARzpUL01V1nYgMAxJUdQHwFk4VMwmnhtA5cBFfOC/3eQxwOfCR26e+Q1VvD1jQF8jLfc5XvNznRUAbEVkPpAJPqOrBwEV9Ybzc58eBN0VkIE4zSvdQPskTkdk4zX+l3H6SF4GLAVR1Ck6/yS1AEnAc6JGn2w/hY2eMMSaP5cfmI2OMMefJkoIxxph0lhSMMcaks6RgjDEmnSUFY4wx6SwpmKAjIqkikujxLyKbshFZjSaZy20udUfi/MkdIqL6eayjj4g84L7uLiJlPJZNE5GoPI7zexGJ9uI9A0Tk0gvdtikYLCmYYHRCVaM9/m3z03a7qmodnMESx+T2zao6RVXfdSe7A2U8lvVU1fV5EuX/4pyMd3EOACwpGK9YUjAhwa0R/FdEfnT/NcqkTE0RWeXWLtaISFV3/t895k8VkbAcNrcMqOK+t5U7Tv/P7jj3hdz5I+V/z6cY6857SUQGi8jdOONLvedus4h7hh8jIn1FZLRHzN1F5PXzjHMFHgOhicgbIpIgznMUhrrzHsVJTl+JyFfuvDYissI9jh+JyOU5bMcUIJYUTDAq4tF0NM+dtw9orao3APcBEzN5Xx/gNVWNxvlR3uUOe3Af0Nidnwp0zWH7HYCfRaQwMAO4T1WvwxkBoK+IlADuAGqqam1guOebVXUukIBzRh+tqic8Fs8F7vSYvg/44DzjbIczrEWaZ1U1BqgNNBOR2qo6EWdcnBaq2sId+uI54Gb3WCYAg3LYjilA8t0wFyZfOOH+MHq6GJjktqGn4ozpk9EK4FkRKQt8oqqbRaQVUBf43h3eowhOgsnMeyJyAtiGM/xydWCrqm5yl78DPAxMwnk+wzQR+Rfg9dDcqrpfRLa4Y9Zsdrex3F1vbuK8DGfYB8+nbt0rIr1xvtfX4DxwZk2G9zZw5y93t3MJznEzBrCkYELHQOB3oA5ODfech+ao6vsi8h1wK7BIRHriDDP8jqoO8WIbXT0HzBORTJ+x4Y7HUw9nELbOQH+gZS725QPgXuAXYJ6qqji/0F7HifMEspFAHHCniFQEBgM3quohEZmBMzBcRgJ8rqpdchGvKUCs+ciEimLAXneM/G44Z8lnEZFKwBa3yWQBTjPKF8DdInKVW6aEeP986l+ACBGp4k53A7522+CLqepCnE7czK4AOoozfHdmPgE64TwH4AN3Xq7iVNUUnGagBm7T0xXAn0CyiFwNtM8ilpVA47R9EpFLRSSzWpcpoCwpmFAxGXhQRFbiNB39mUmZ+4C1IpII1MB5ZOF6nB/PxSKyBvgcp2klR6p6EmcEyo9E5GfgDDAF5wf2M3d9X+PUYjKaAUxJ62jOsN5DwHqggqqucuflOk63r2IcMFhVf8J5NvM6YDpOk1SaeODfIvKVqu7HuTJqtrudlTjHyhjARkk1xhjjwWoKxhhj0llSMMYYk86SgjHGmHSWFIwxxqSzpGCMMSadJQVjjDHpLCkYY4xJ9//o3Th1AbKzHQAAAABJRU5ErkJggg==\n","text/plain":["<Figure size 432x288 with 1 Axes>"]},"metadata":{"tags":[],"needs_background":"light"}}]},{"metadata":{"id":"TrUqcwHFJG16","colab_type":"code","colab":{}},"cell_type":"code","source":["maxindex = (Recall - FPR).tolist().index(max(Recall - FPR))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"ogzsCxPEJG19","colab_type":"code","colab":{},"outputId":"7f8092bc-3220-4237-f2c9-32d1787b70d0"},"cell_type":"code","source":["thresholds[maxindex]"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["-0.08950517388953827"]},"metadata":{"tags":[]},"execution_count":160}]},{"metadata":{"id":"0zmjb1dwJG1-","colab_type":"code","colab":{}},"cell_type":"code","source":["from sklearn.metrics import accuracy_score as AC"],"execution_count":0,"outputs":[]},{"metadata":{"id":"rejIQnMrJG1_","colab_type":"code","colab":{}},"cell_type":"code","source":["clf = SVC(kernel = \"linear\",C=3.1663157894736838,cache_size = 5000\n","          ,class_weight = \"balanced\"\n","         ).fit(Xtrain, Ytrain)"],"execution_count":0,"outputs":[]},{"metadata":{"id":"qdDsbsn6JG1_","colab_type":"code","colab":{}},"cell_type":"code","source":["prob = pd.DataFrame(clf.decision_function(Xtest))"],"execution_count":0,"outputs":[]},{"metadata":{"id":"j7JIPvgGJG2A","colab_type":"code","colab":{},"outputId":"f6fa04b7-300a-48ad-c9ca-9b8f59011ab6"},"cell_type":"code","source":["prob.head()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>0</th>\n","      <th>y_pred</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>2.189193</td>\n","      <td>1.0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>0.373116</td>\n","      <td>1.0</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>-0.015488</td>\n","      <td>1.0</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>-1.136262</td>\n","      <td>0.0</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>-0.240851</td>\n","      <td>0.0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["          0  y_pred\n","0  2.189193     1.0\n","1  0.373116     1.0\n","2 -0.015488     1.0\n","3 -1.136262     0.0\n","4 -0.240851     0.0"]},"metadata":{"tags":[]},"execution_count":166}]},{"metadata":{"id":"FsyOy9jXJG2B","colab_type":"code","colab":{}},"cell_type":"code","source":["prob.loc[prob.iloc[:,0] >= thresholds[maxindex],\"y_pred\"]=1\n","prob.loc[prob.iloc[:,0] < thresholds[maxindex],\"y_pred\"]=0"],"execution_count":0,"outputs":[]},{"metadata":{"id":"pZwQ6CpeJG2G","colab_type":"code","colab":{},"outputId":"8f084244-d019-47f1-b5e9-c87242a9611f"},"cell_type":"code","source":["prob.loc[:,\"y_pred\"].isnull().sum()"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0"]},"metadata":{"tags":[]},"execution_count":167}]},{"metadata":{"id":"ox_gJ6dZJG2H","colab_type":"code","colab":{},"outputId":"2700c7ae-d923-4844-b03d-35d5bc22deff"},"cell_type":"code","source":["times = time()\n","score = AC(Ytest,prob.loc[:,\"y_pred\"].values)\n","recall = recall_score(Ytest, prob.loc[:,\"y_pred\"])\n","print(\"testing accuracy %f,recall is %f\" % (score,recall))\n","print(datetime.datetime.fromtimestamp(time()-times).strftime(\"%M:%S:%f\"))"],"execution_count":0,"outputs":[{"output_type":"stream","text":["testing accuracy 0.789333,recall is 0.804665\n","00:00:005985\n"],"name":"stdout"}]},{"metadata":{"id":"ch5GSJ_LJG2I","colab_type":"code","colab":{}},"cell_type":"code","source":[""],"execution_count":0,"outputs":[]}]}